/[volute]/trunk/projects/std-vounits/VOUnits.tex
ViewVC logotype

Contents of /trunk/projects/std-vounits/VOUnits.tex

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2476 - (show annotations)
Fri Mar 14 11:15:32 2014 UTC (7 years, 7 months ago) by mark.beauchamp.taylor@gmail.com
File MIME type: application/x-tex
File size: 93499 byte(s)
Fix tiny typos
1 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2 % For an conversion via cgiprint (HTX):
3 % See http://vizier.u-strasbg.fr/local/man/cgiprint.htx
4 \def\ifhtx{\iffalse} % Lines used only for the HTML version
5 \ifhtx
6 % . . .
7 % . . . Definitions in HTX context
8 % . . .
9 \else
10 \documentclass[11pt,notitlepage,onecolumn]{ivoa}
11 % . . .
12 % . . . Definitions in LaTeX context
13 % . . .
14 \fi
15 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
16
17 \def\SVN$#1: #2 ${\expandafter\def\csname SVN#1\endcsname{#2}}
18 \SVN$Revision$
19 \SVN$Date$
20 \SVN$HeadURL$
21
22 \usepackage{natbib} % use author-year citations
23
24 \usepackage{prettyref} % ensure consistent cross-references
25 \newrefformat{sec}{Sect.~\ref{#1}}
26 \newrefformat{appx}{Appx.~\ref{#1}}
27 \newrefformat{fig}{Fig.~\ref{#1}}
28 \newrefformat{tab}{Table~\ref{#1}}
29 \usepackage{varioref}
30 \newrefformat{tabx}{Table~\vref{#1}}
31
32 % Extend the {tabular} column types, so we can conveniently get
33 % raggedright (ie non-insanely-spaced) column entries. Follows the
34 % excellent answer at <http://tex.stackexchange.com/questions/12703/>
35 \usepackage{array}
36 \newcolumntype{L}[1]{>{\raggedright\let\newline\\\arraybackslash\hspace{0pt}}p{#1}}
37
38
39 %\usepackage{supertabular,multicol}
40
41 % Physical units in \rm. Unstarred version includes leading
42 % \thinspace. Starred version doesn't, and is used when referring to
43 % the unit by itself (eg axis is $B/\units*T$), and is not qualifying
44 % a number
45 \makeatletter
46 \def\units{\@ifstar{\let\un@tsspace\relax \un@ts}%
47 {\let\un@tsspace\thinspace\un@ts}}
48 \newcommand{\un@ts}[1]{{\let~\thinspace
49 \ifmmode
50 \un@tsspace\mathrm{#1}%
51 \else
52 \nobreak$\un@tsspace\mathrm{#1}$%
53 \fi}}
54
55 \newcommand*\hex[1]{\uppercase{#1}${}_{16}$}
56 %\newcommand*\hex[1]{\texttt{0x#1}} % alternative formatting
57
58 \usepackage{verbatim} % for \verbatiminput
59 \def\verbatim@font{\fontsize{9}{11}\selectfont\ttfamily}
60 % \DeclareRobustCommand{\^}{%
61 % \ifmmode\nfss@text{\textasciicircum}\else\textasciicircum\fi}
62
63 %\definecolor{normative}{rgb}{0.1,0.1,0.5}
64 \newcommand*\norm[1]{\textbf{\color{ivoacolor}#1}}
65
66 \makeatother
67
68 % abbreviation for 'e.g.', which (a) gets spacing right after the full
69 % stop, and (b) allows us to change the punctuation globally if we
70 % decide to.
71 \def\eg{e.g.,~}
72
73 %%
74 %% If document is processed with latex, dvips and ps2pdf
75 %%
76 \ifx\pdftexversion\undefined
77 \usepackage[dvips]{graphicx}
78 \DeclareGraphicsExtensions{.eps,.ps}
79 %% Uncomment following line if you want PDF thumbnails
80 % \usepackage[ps2pdf]{thumbpdf}
81 % for old hyperref, use:
82 \usepackage[ps2pdf]{hyperref}
83 %% for recent hyperref, use:
84 % \usepackage[ps2pdf,bookmarks=true,bookmarksnumbered=true,hypertexnames=false,breaklinks=true,%
85 % colorlinks,linkcolor=blue,urlcolor=blue]{hyperref}
86
87 %%
88 %% else if document is processed with pdflatex
89 %%
90 \else
91 \usepackage[pdftex]{graphicx} %% graphics for pdftex (supports .pdf .jpg .png)
92 \usepackage{epstopdf} %% requires epstopdf
93 %% this is to support .ps files :
94 \makeatletter
95 \g@addto@macro\Gin@extensions{,.ps}
96 \@namedef{Gin@rule@.ps}#1{{pdf}{.pdf}{`ps2pdf #1}}
97 \makeatother
98 %% comment above lines if you have included ps files
99 %\DeclareGraphicsExtensions{.pdf,.jpg,.png}
100 %% Uncomment following line if you want PDF thumbnails
101 % \usepackage[pdftex]{thumbpdf}
102 %% for old hyperref, use:
103 % \usepackage[ps2pdf]{hyperref}
104 % for recent hyperref, use:
105 \usepackage[pdftex,bookmarks=true,bookmarksnumbered=true,hypertexnames=false,breaklinks=true,%
106 colorlinks,allcolors=ivoacolor]{hyperref}
107 \pdfadjustspacing=1
108 \fi
109 \usepackage[final]{pdfpages}
110 %\usepackage{tabulary} %%
111 %% Header of the document...
112 %%
113 % Provide a title for your document
114 \title{Units in the VO}
115 % Give date and version number
116 \date{1.0-20140226}
117
118 % Choose one document type from below
119 %\ivoatype{IVOA Note}
120 %\ivoatype{IVOA Working Draft}
121 \ivoatype{IVOA Proposed Recommendation}
122 %\ivoatype{IVOA Recommendation}
123
124 \version{1.0}
125 % Give author list: separate different authors with \\
126 % You can add email addresses with links \url{mailto:yourname@ivoa.net}
127 \author{Markus Demleitner\\
128 S\'{e}bastien Derri\`ere\\
129 Norman Gray\\
130 Mireille Louys\\
131 Fran\c{c}ois Ochsenbein}
132 \editor{S\'{e}bastien Derri\`{e}re and Norman Gray}
133
134 \urlthisversion{\footnotesize{\url{http://www.ivoa.net/Documents/VOUnits/20140226/}}}
135 \urllastversion{\footnotesize{\url{http://www.ivoa.net/Documents/VOUnits/}}}
136 \previousversion{\footnotesize{\url{http://www.ivoa.net/Documents/VOUnits/20131224/}}}
137 %\previousversion{\footnotesize{\url{http://www.ivoa.net/Documents/VOUnits/20130922/}}}
138 %\previousversion{\footnotesize{\url{http://www.ivoa.net/Documents/VOUnits/20131011/}}}
139 %\previousversion{\footnotesize{\url{http://www.ivoa.net/documents/VOUnits/20130724/PR-VOUnits-1.0-20130922.pdf}}}
140 %\previousversion{\footnotesize{\url{http://www.ivoa.net/Documents/VOUnits/20130429/}}}
141 %\previousversion{\footnotesize{\url{http://www.ivoa.net/Documents/VOUnits/20130225/}}}
142 %\previousversion{\footnotesize{\url{http://www.ivoa.net/internal/IVOA/UnitsDesc/WD-VOUnits-v1.0-20120522.pdf}}}
143 %\urlthisversion{\footnotesize{\url{http://www.ivoa.net/Documents/VOUnits/20130225/}}}
144 %\previousversion{\footnotesize{\url{http://www.ivoa.net/internal/IVOA/UnitsDesc/WD-VOUnits-v1.0-20120718.pdf}}}
145 %\previousversion{\footnotesize{\url{http://www.ivoa.net/Documents/VOUnits/20120801/}}}
146
147
148
149 %%%%%%%%%%%%%%%%%
150 %mir \documentclass[12pt]{article}
151 %\usepackage{graphicx}
152 %\usepackage{hyperref}
153 %\usepackage{psfig}
154 %\usepackage{html}
155 %\usepackage{epsf}
156 %\usepackage{lscape}
157 %mir \textheight 9.0in \hoffset -0.5in \voffset -0.5in
158 %\newcommand{\Sensitiv}{Variation}
159 \definecolor{orange}{rgb}{0.7,0.5,0.0}
160 \newcommand{\unit}[1]{\texttt{\small\color{orange}#1}}
161 %\newcommand{\unit}[1]{\textbf{\textsf{\color{orange}#1}}}
162 \usepackage[T1]{fontenc}
163 \usepackage{longtable}
164 \usepackage{multirow}
165 %\font\symbo=psyr at 10pt
166 %\def\micro{{\symbo \char109}}
167 \def\micro{{\ensuremath \mu}}
168
169 %Mir colors definitions
170 \newcommand{\bleu}[1]{\textcolor[rgb]{0.00,0.00,1.00}{#1}}
171 \newcommand{\blue}{\textcolor{blue}}
172 \newcommand{\violet}{\textcolor[rgb]{0.50,0.00,0.50}}
173 \newcommand{\brown}{\textcolor[rgb]{0.50,0.10,0.10}}
174 %%%%%%%%%%%%%%%%%
175
176 %\usepackage{showlabels}
177
178
179
180 \begin{document}
181 \maketitle % print header in standard form
182 \thispagestyle{empty}
183 \begingroup
184 %%\input{versions}
185 \vfill
186 %%\hbox to \textwidth{\hfil\tiny Volute: \SVNRevision, \SVNDate}
187 \hbox to \textwidth{\hfil\tiny code.google.com/p/volute, rev\SVNRevision, \SVNDate}
188 %\hbox to \textwidth{\hfil\tiny Volute: \SVNHeader}
189 \endgroup
190 \newpage
191 \tableofcontents
192 \newpage
193 \listoftables
194 \newpage
195 \section*{Abstract}
196 This document describes a recommended syntax for writing the string
197 representation of unit labels (`VOUnits'). In addition, it describes
198 a set of recognised and deprecated units, which is as far as possible
199 consistent with other relevant standards (BIPM, ISO/IEC and the IAU).
200
201 The intention is that units written to conform to this specification
202 will likely also be parsable by other well-known parsers. To this
203 end, we include machine-readable grammars for other units syntaxes.
204
205 \section*{Status of this document}
206
207 This is an IVOA Proposed Recommendation made available for public review.
208 It is appropriate to reference this document only as a recommended standard
209 that is under review and which may be changed before it is accepted as a full recommendation.
210
211 %This is an IVOA Working Draft for review by IVOA members and
212 %other interested parties. It is a draft document and may be updated,
213 %replaced, or rendered obsolete by other documents at any time. It is
214 %inappropriate to use IVOA Working Drafts as reference materials or to cite
215 %them as other than ``work in progress''.
216
217 This document is a substantial update of the previous version 0.2 that
218 was written within the Data Model IVOA Working Group. As decided in previous
219 IVOA interoperability meetings, the Semantics working group is now in charge
220 of the document. This document is intended to become a full IVOA recommendation,
221 following agreement within the community and standard IVOA recommendation process.
222
223 The place for discussions related to this document is the
224 Semantics IVOA mailing list {\tt semantics\@@ivoa.net}.
225
226 A list of current IVOA recommendations and other technical documents can be found at
227 \url{http://www.ivoa.net/Documents/}.
228
229 \subsection*{Note on conformance}
230
231 Text within the following document is classified as either
232 `normative' or `informative'.
233
234 \textbf{Normative} text means information that is required
235 to implement the Recommendation; an implementation of this
236 Recommendation is conformant if it abides by all the prescriptions
237 contained in normative text. \textbf{Informative} text is
238 information provided to clarify or illustrate a requirement but which
239 is not required for conformance.
240
241 The sections and subsections of this Recommendation are labeled,
242 after the section heading, to specify whether they are normative or
243 informative. If a subsection is not labeled, it has the same
244 normativity as its parent section. References are normative if they
245 are referred to within normative text.
246
247 When found within normative sections, the key words
248 \norm{must},
249 \norm{must not},
250 \norm{required},
251 \norm{shall},
252 \norm{shall not},
253 \norm{should},
254 \norm{should not},
255 \norm{recommended},
256 \norm{may},
257 \norm{optional},
258 thus formatted, are to be interpreted as described in RFC 2119
259 \citep{std:rfc2119}.
260
261 \section*{Acknowledgements}
262
263 We thank all those participants in IVOA and EuroVO workshops who have
264 contributed by exposing use cases and providing comments, especially
265 Rick Hessman,
266 Paddy Leahy,
267 Jeff Lusted,
268 Jonathan McDowell,
269 Marco Molinaro,
270 Pedro Osuna,
271 Anita Richards,
272 Bruno Rino,
273 Arnold Rots,
274 Jesus Salgado,
275 Mark Taylor,
276 Brian Thomas
277 and recent contributors on the DM and Semantics forums.
278
279 \section{Introduction (informative)}
280 \label{sec:intro}
281
282 This document describes a standardised use of units in the VO
283 (hereafter simply `VOUnits'). It aims to describe a syntax for unit
284 strings which is as far as possible in the intersection of existing
285 syntaxes, and to list a set of `known units' which is
286 the union of the `known units' of those standards.
287 We \emph{recommend}, therefore, that applications which write out
288 units should do so using \emph{only} the VOUnits syntax, and that
289 applications reading units should be able to read \emph{at least} the
290 VOUnits syntax, plus all of the units of \prettyref{sec:knownunits}.
291 It is not, however, quite possible for VOUnits to be in the
292 intersection of existing syntaxes; there is futher discussion of this
293 point in \prettyref{sec:deviations}.
294
295 We also provide, for information, a set of self- and mutually-consistent
296 machine-readable grammars for all of the syntaxes discussed.
297
298 The introduction gives the motivation for
299 this proposal in the context of the VO architecture, from the legacy
300 metadata available in the resource layer, to the requirements of the various
301 VO protocols and standards and applications.
302
303 This document is organised as follows. \prettyref{sec:proposal}
304 details the proposal for VOUnits. \prettyref{sec:useCase} lists some
305 use cases and reference implementations. In \prettyref{appx:current},
306 there is a brief review of current practices in the description and
307 usage of units; in \prettyref{appx:comparisons} there is a detailed
308 discussion of the differences between the various syntaxes; and
309 in \prettyref{appx:grammar} there are formal (yacc-style) grammars for
310 the four syntaxes discussed.
311
312 The normative content of this document is \prettyref{sec:proposal} and \prettyref{appx:vougrammar}.
313
314 \subsection{Units in the VO Architecture}
315
316 % Why are the default LaTeX float parameters so _irritatingly_ cautious?
317 \renewcommand{\topfraction}{.85}
318 \renewcommand{\bottomfraction}{.7}
319 \renewcommand{\textfraction}{.15}
320 \renewcommand{\floatpagefraction}{.66}
321
322 \begin{figure}%[htbp]
323 \centerline{\includegraphics[width=0.9\textwidth]{unitsInIVOA.pdf}}
324 \caption{Units is a core building block in the VO. Most parts of the
325 architecture rely on it: the User Layer with tools and clients, the
326 Resource Layer with data. Protocols, registries entries, and
327 data models also re-use these Units definitions.}
328 \label{fig:architecture}
329 \end{figure}
330
331 Generally, every quantity provided in astronomy has a unit attached to
332 its value or is unitless (\eg a ratio, or a numerical multiplier).
333
334 Units lie at the core of the VO architecture, as can be seen in \prettyref{fig:architecture}.
335 Most of the existing data and metadata collections accessible in the resource
336 layer have some legacy units, which are mandatory for any scientific use of
337 the corresponding data. Units can be embedded in data (\eg FITS headers) or be
338 implied by convention and/or (preferably) specified in metadata.
339
340 Units also appear in the VOTable format \citep{ochsenbein11}, through the use
341 of a {\tt unit} attribute that can be used in the {\tt FIELD}, {\tt PARAM} and {\tt INFO}
342 elements. Because of the widespread dependency of many other VO standards on VOTable,
343 these standards inherit a dependency on Units.
344
345 The Units also appear in many Data Models, through the use of dedicated elements in
346 the models and schemas.
347 At present, each VO standard either refers to some external reference document, or
348 provides explicit examples of the Units to be used in its scope, on a case-by-case
349 basis.
350
351 The registry records can also contain units, for the description of table metadata.
352 The definition of VO Data Access protocols uses units by specifying in which units the input
353 parameters have to be expressed, or by restricting the possible units in which some
354 output must be returned.
355
356 And last but not least, tools can interpret units, for example to display
357 heterogeneous data in a single diagram by applying conversions to a reference
358 unit on each axis.
359
360 \subsection{Adopted terms and notations\label{sec:notations}}
361
362 Discussions about units often suffer from misunderstandings arising from cultural
363 differences or ambiguities in the adopted vocabulary. For the sake of clarity, in this
364 document, the following concepts are used:
365
366 %\begin{itemize}
367 %\item
368 A \textbf{quantity} is the combination of a (numerical) {\em
369 value}, measured for a {\em concept} and expressed in terms of a given
370 {\em unit}; there may be other structure to a quantity, such as
371 uncertainty or even provenance.
372 In the VO context, the nature of the concept can be expressed with a UCD or a utype. This document does not address the full issue of
373 representing quantities, but focusses on the {\em unit} part.
374
375 %\item
376 A \textbf{unit} can be expressed in various forms: in natural language
377 (\eg \emph{metres per second squared}), with a combination of symbols
378 with typographic conventions (\eg m s$^{-2}$), or by a simplified text
379 label (\eg \unit{m.s-2}). VOUnit deals with the label form, which is
380 easier to standardize, parse and exchange. A VOUnit corresponds in the
381 most general case to a combination of several (possibly prefixed)
382 symbols with mathematical operations expressed in a controlled syntax.
383
384 A \textbf{unit} consists of a sequence of \textbf{unit components},
385 each of which represents a \textbf{base unit}, possibly modified by a
386 multiplicative \textbf{prefix} (of one or two characters), and raised
387 to an integer or rational power. The whole unit may (in some
388 syntaxes) be prefixed by a numerical \textbf{scale-factor}.
389
390 Each of the \textbf{base units} (for example, the metre) is
391 represented by a \textbf{base symbol} (for example \unit{m}). Each
392 syntax has a number of \textbf{known units}
393 (\prettyref{sec:knownunits}), for each one of which there is at least
394 one symbol which identifies only that unit.
395
396 A \textbf{symbol} is either a base symbol or a base symbol with a
397 scaling prefix.
398
399 For example, in the unit of \unit{1.663e-1mm.s**-1}, the scale-factor
400 is $1.663\times10^{-1}$, the two unit-components are \texttt{mm}
401 and \texttt{s**-1}; the first symbol has base symbol \texttt{m} and
402 prefix \texttt{m} (for `milli'), and the second has base
403 symbol \texttt{s}, no prefix, and the power~$-1$.
404
405 %% Remark: some complex questions, more related to data modeling than to units, such as how a quantity
406 %% is associated to its measurement error, or how groups of coordinates are described, are not addressed in this
407 %% document. They can always be broken down, with appropriate modeling, into smaller bits to which VOUnits can
408 %% be applied.
409
410
411 \subsection{Purpose of this document}
412 \label{sec:purpose}
413
414 The purpose of this document is to provide a reference specification of how
415 to write VOUnits, in order to maximize interoperability within the VO;
416 the intention is that VOUnit strings should be reliably
417 parsable by humans \emph{and} computers, with a single interpretation.
418 This is broadly the case for the other existing
419 unit-string syntaxes, although there are some slight ambiguities in
420 the specifications of these syntaxes (cf \prettyref{appx:grammar}).
421 We therefore include a set of self- and mutually-consistent
422 machine-readable grammars for all of the syntaxes discussed.
423
424 The unit syntax(es) described here are intended to be human-readable,
425 to the extent that, for example, a string such as \unit{mm.s**-2} is
426 human-readable (without this restriction, we could easily define a
427 much more regular machine-to-machine grammar). Having an explicit
428 unit-string grammar means that data providers can write human-readable
429 strings in the confidence that the result will \emph{additionally} be
430 machine-readable in a reliable and checkable way. Or, where a string
431 is not fully machine readable (because a data provider needs to use a
432 custom unit such as 'jupMass'; see \prettyref{sec:quoting}), that the
433 string is at least partially machine readable, and that that partial
434 readability is non-ambiguous.
435
436 We aim not to reinvent the wheel, and to be as compliant as possible with
437 legacy metadata in major archives, and astronomers' habits.
438
439 In particular:
440 \begin{itemize}
441 \item We describe (\prettyref{appx:current}) a number of existing unit
442 syntaxes, and mention some ambiguities in their
443 definition. Application authors should expect to encounter each of
444 the syntaxes mentioned in this document (FITS, OGIP and CDS); all of
445 these are broadly endorsed by this specification.
446 \item In addition to the unit syntaxes described above, there are
447 multiple specifications of base and known units
448 (we refer, in particular, to
449 specifications from BIPM, ISO/IEC and the IAU);
450 %\citet{si-brochure,std:iec80000-13,iau12});
451 these are broadly, but not completely, mutually consistent.
452 \item Where there are some ambiguities in, or contradictions between,
453 these various specifications, we recommend that application authors should
454 resolve them as indicated in this specification.
455 \item This document defines a syntax, called `VOUnits', which is as
456 far as is feasible in the intersection of the three existing
457 syntaxes, and which we recommend that applications should use when
458 writing unit strings. This aim is not quite possible in fact, and
459 the extensions to it, and the mild deviations from it, are discussed
460 below in \prettyref{sec:proposal} and \prettyref{appx:grammar};
461 there is a summary of the various units
462 in \prettyref{tabx:knownunits}.
463 \end{itemize}
464
465 % Data providers are encouraged to follow the VOUnits specifications for expressing
466 % their metadata. And application developers can rely on these specifications in order
467 % to know what VOUnits they should expect to face.
468
469
470
471 \subsection{What this document will not do}
472 \label{sec:outofscope}
473
474 This Recommendation does \textbf{not} prescribe what units data
475 providers employ, except to the extent that we avoid giving a standard
476 interpretation for a unit in some cases (for example we do not
477 acknowledge the degree celsius or the century as units). Since we do
478 not forbid `unrecognised' units, this need not restrict data providers.
479 Nor do we demand that a given quantity be expressed in a
480 unique way (\eg all distances in \unit{m}). So long as data is
481 labelled in a recognised system, a translation layer can be
482 provided. Data providers can customise the translation tools if
483 required. Depending on preference and the operations required, the
484 user may have a choice of units for his or her query and for the
485 result. In particular, the Recommendation does not require that only
486 recognised units are used. While it is obviously desirable for data
487 providers to use recognised and non-deprecated units where possible,
488 there are occasions when this is unnecessary or undesirable.
489
490 This Recommendation does not discuss \emph{quantities} at all. That
491 is, we do not discuss the combination of number and unit which refers
492 to a particular physical measurement, such as `2$\mathrm m\,\mathrm
493 s^{-1}$'. Though this might appear to be a trivial extension, it
494 raises questions of the representation of decimal numbers, the
495 representation of uncertainties, questions of unit conversion, and
496 other data-modelling imponderables which have in the past, possibly
497 surprisingly, generated a great deal of discussion within the
498 IVOA without, so far, a generally acceptable resolution.
499
500 This Recommendation describes only isolated units, and not arrays,
501 records or other combinations of units. Several VO protocols require
502 embedding complex objects into result tables, and give string
503 serializations for those: geometries in TAP results are the most
504 common example. This specification does not cover this situation,
505 although we hope that where individual unit strings are required in
506 such instances, their syntax will conform to, or include, this
507 specification by reference.
508
509 In general, this Recommendation is concerned almost exclusively with
510 the syntactic question of what is and is not a valid unit string,
511 leaving most questions of interpretation or enforcement to a higher layer in an
512 application stack. Specifically:
513 \begin{itemize}
514 \item The specification does not forbid `unknown' units. An
515 implementation of this specification should be able to recognise, and
516 communicate, that a unit is unknown, but it is not required to reject
517 a unit string on the grounds that it is unrecognised.
518
519 \item Similarly, although \prettyref{tabx:knownunits} forbids some
520 units from having SI prefixes, a VOUnit implementation should not
521 itself reject a unit string which incorrectly includes a prefix, but
522 should instead just make available the information that this has been
523 detected, and that it is deprecated.
524
525 \item The list of known units in \prettyref{sec:knownunits} is not
526 specific about the precise definitions of the units in question; for
527 example, it refers to the `second' without distinguishing between the
528 various possible definitions that the second may have. In a
529 particular context, a data provider may need to indicate which of a
530 number of possible definitions is being used in fact. That said, a
531 VOUnits processor must interpret the symbols
532 of \prettyref{tabx:knownunits} compatibly with the indicated units:
533 a \unit{m} is always a metre of one type or another, and may not be
534 interpreted as, for example, a minute.
535
536 \item This Recommendation does not specify how an application should
537 compare units for equivalence; for example, an application may or may
538 not wish to deem \unit{m/s} and \unit{km/s} to be `equivalent'.
539 This Recommendation, similarly, does not specify how to compare units
540 with scale-factors (cf \prettyref{sec:scaleFactors}).
541 \end{itemize}
542
543 \section{The VOUnits syntax (normative)\label{sec:proposal}}
544
545 The rules for VOUnits are defined in this section.
546 Various aspects are addressed:
547 \begin{itemize}
548 \item how the labels are encoded;
549 \item what base symbols are allowed and how they are spelled;
550 \item what prefixes are allowed and how they are used;
551 \item how symbols are combined.
552 \end{itemize}
553 A formal grammar summarizing these conventions is given
554 in \prettyref{appx:vougrammar}.
555
556 The text below is expected to be compatible with the prescriptions
557 of the SI standard \citep{si-brochure}, except where noted.
558
559 \subsection{String representation and encoding\label{sec:encoding}}
560
561 VOUnits may occur in legacy contexts, in which the presence of
562 non-ASCII characters may cause considerable technical inconvenience
563 (for example FITS cards). There are only a few non-ASCII characters
564 which we might wish to include in unit strings (for example \AA\
565 or \micro), and we can find substitutes for these sufficiently easily, that we
566 feel there is little real benefit in permitting non-ASCII characters
567 in VOUnit strings.
568
569 All the VOUnit characters in the specification below are printable ASCII
570 characters (that is, in the range hexadecimal 20 to 7E); any
571 extensions to this standard \norm{should} be restricted to this same range.
572
573 All VOUnit strings \norm{must} be regarded as case-sensitive (the
574 strings in the other syntaxes are also case-sensitive).
575
576 \subsection{Parsing unit strings -- overview\label{sec:parsing-components}}
577
578 The unit strings \unit{unknown} and \unit{UNKNOWN} (that is, in
579 all-lowercase or all-uppercase) are reserved for cases
580 when the unit is unknown; that is, it is known that there should be a
581 unit, but the unit string has been lost or not been specified. These
582 strings are not, however, part of the list of known units or the VOUnits grammar,
583 and applications \norm{must} check for their presence before unit parsing.
584
585 An empty unit string positively indicates that the corresponding
586 quantity is dimensionless. Since an empty string does not conform to
587 the grammars below, this also \norm{must} be checked for before
588 unit-parsing starts.
589
590 A \textbf{symbol} within a unit-component \norm{should} be parsed as follows:
591 \begin{enumerate}
592 \item If it corresponds to a known \textbf{base symbol}, then it
593 \norm{must} be recognised as such (for example the \unit{Pa} must be
594 parsed as the known Pascal, and never as the peta-year).
595
596 \item If the symbol starts with a multiplicative prefix, then this is
597 recognised independently of whether the resulting base symbol is a
598 known or unknown unit -- thus \unit{Mm} and \unit{Mfurlong} are parsed
599 as millions of metres and furlongs, but note that this implies, for
600 the sake of consistency, that \unit{furlong} is parsed as the
601 femto-`urlong'.
602
603 \item In the VOUnits syntax (a significant divergence from the other
604 syntaxes), base symbols \norm{may} be put between single
605 quotes \unit{'...'} (ASCII character \hex{27}).
606 Such symbols \norm{must} be parsed as
607 unrecognised unit symbols which are not further examined.
608 See \prettyref{sec:quoting} for discussion.
609 \end{enumerate}
610
611 A library which implements this specification \norm{should} be able to
612 distinguish known and unknown units, and identify deviations from the
613 restrictions on their use, below. It \norm{should} be able to
614 communicate such information to a caller, but it \norm{should not}
615 unilaterally reject unit strings which use unknown units or use known
616 units in disapproved ways (of course, a higher-level application is
617 free to reject unit strings for any reason it pleases).
618
619 \subsection{Base units\label{sec:baseUnits}}
620
621 There is good agreement for the base symbols across the different schemes
622 (see \prettyref{tabx:comparUnitBase}).
623
624 The VOUnits base symbols are listed in \prettyref{tab:voubase}
625
626 \begin{table}[ht]
627 \begin{center}
628 \def\arraystretch{1.2}
629 \begin{tabular}{|rl|rl|rl|rl|}\hline
630 \unit{m}&(metre) &\unit{g}&(gram) &\unit{J}&(joule) &\unit{Wb}&(weber)\\
631 \unit{s}&(second of time) &\unit{rad}&(radian) &\unit{W}&(watt) &\unit{T}&(tesla)\\
632 \unit{A}&(ampere) &\unit{sr}&(steradian) &\unit{C}&(coulomb) &\unit{H}&(henry)\\
633 \unit{K}&(kelvin) &\unit{Hz}&(hertz) &\unit{V}&(volt) &\unit{lm}&(lumen)\\
634 \unit{mol}&(mole) &\unit{N}&(newton) &\unit{S}&(siemens) &\unit{lx}&(lux)\\
635 \unit{cd}&(candela) &\unit{Pa}&(pascal) &\unit{F}&(farad) &\unit{Ohm}&(ohm)\\\hline
636 \end{tabular}
637 \end{center}
638 \caption{\label{tab:voubase}VOUnits base units}
639 \end{table}
640
641 For masses, the SI unit is \unit{kg}. However, existing specifications
642 recommend not using scale-factors with \unit{kg}, but attaching them
643 only to \unit{g} instead.
644
645 Recognising a known unit takes priority over parsing for prefixes.
646 Thus the string \unit{Pa} represents the Pascal, and not the
647 peta-year, and the string \unit{mol} will always be the mole, and
648 never a milli-`ol', for some unknown unit~`ol'.
649
650 \subsection{Known units}
651 \label{sec:knownunits}
652
653 In \prettyref{tabx:knownunits}, we indicate the `known units' for each of the
654 described syntaxes, which go beyond the physically motivated set of
655 base units.
656 There are a few units (namely `\unit{angstrom} or \unit{Angstrom}',
657 \unit{pix} or \unit{pixel}', `\unit{ph} or \unit{photon}' and `\unit{a} or \unit{yr}') for
658 which there are recognised alternatives in some syntaxes, and in these
659 cases `p' marks the preferred one.
660
661 \emph{Unrecognised units \norm{should} be accepted by parsers},
662 as long as they are parsed giving preference to the syntaxes and
663 prefixes described here. Thus, for example, the
664 string \unit{furlong/week} \norm{should} parse successfully (though
665 perhaps with suitably prominent warnings) as the femto-`urlong' per
666 week.
667
668 The Unity library (\prettyref{sec:libraries}) recognises units with
669 respect to a subset of the QUDT unit framework~\cite{qudt}, with some
670 astronomy-specific additions. This is a particularly comprehensive
671 collection of units, and we commend it to the IVOA community as
672 a \emph{lingua franca} for this type of work.
673
674 Sections \ref{sec:binary} to \ref{sec:other} below, discussing the set
675 of known units, are longer than one might expect would be necessary.
676 Most of the discussion concerns rather arcane edge-cases, or attempts
677 to reconcile the minor deviations between the relevant existing
678 standards. In all cases, we have attempted to be as uninnovative and
679 unsurprising as possible.
680
681 Future versions of this specification may add to the set of known units.
682
683 \begin{table}
684 \hbox to \textwidth{\hss
685 \catcode`\%=11
686 \begin{tabular}{rlcccc|rlcccc}
687 \emph{unit}&\emph{description}&\emph{fits}&\emph{ogip}&\emph{cds}&\emph{vou}&
688 \emph{unit}&\emph{description}&\emph{fits}&\emph{ogip}&\emph{cds}&\emph{vou}\\
689 \input{known-units.tex}
690 \end{tabular}
691 \hss}
692 \caption[Known units in the various syntaxes]
693 {\label{tabx:knownunits}Known units in the various syntaxes.
694 In the table, and for a given syntax, a `$\cdot$' indicates that the unit is recognised,
695 an~`s' that it is additionally permitted to have SI prefixes,
696 a~`b' that binary prefixes will be recognised,
697 and a~`d' that it is recognised but deprecated.
698 For those units which have alternative symbols for a given unit,
699 a~`p' indicates the preferred one.}
700 \end{table}
701
702 \subsection{Binary units}
703 \label{sec:binary}
704
705 The symbol~`b' is sometimes used for `bits', but this is the SI symbol
706 for `barn', and this Recommendation aligns with the SI standard in
707 this respect. Since the same symbol is sometimes used for `bytes', it
708 is probably best avoided in any case.
709
710 \citet[item 13-9.c]{std:iec80000-13} notes that the term `byte'
711 `has been used for numbers of bits other than eight' in the past, but
712 that it should now always be used for eight-bit bytes; we recommend
713 the same interpretation here. The same source notes the theoretical
714 confusion between the symbol \unit{B} for `byte' and for `Bel'. We
715 believe it would be perverse in our present context to recommend
716 against using `B' for byte, and resolve this here
717 in favour of `byte' by mandating that \unit{B} \norm{must} be parsed
718 as indicating the `byte', that the \unit{dB} is an
719 unprefixable special-case unit (as discussed below), and by
720 implication that the `dB'
721 \norm{must not} be interpreted as a tenth of a byte.\footnote{We have no
722 evidence that this has been a common source of confusion within the
723 IVOA, or indeed anywhere else.}
724
725 \subsection{Scale factors\label{sec:scaleFactors}}
726
727 Units \norm{may} be prefixed by any of the 20 SI scale-factors,
728 and a subset \norm{may} be prefixed by the eight binary scale-factors.
729 The SI scale-factors -- provided in \prettyref{tab:vouscalefactors}a --
730 are the same as those of \citet{si-brochure},
731 of \citet[\S6.5.4]{std:iso80000-1},
732 and of \citet[Table~5]{pence10}
733 (see also \prettyref{tabx:comparUnitScale} for further comparisons).
734 %\medskip
735 \begin{table}
736 \def\arraystretch{1.2}
737 \begin{center}
738 \def\pfx#1#2{#1, $10^{#2}$}
739 \begin{tabular}{|rl|rl|}\hline
740 \unit{Y}&\pfx{yotta}{24}&
741 \unit{y}&\pfx{yocto}{-24}\\
742 \unit{Z}&\pfx{zetta}{21}&
743 \unit{z}&\pfx{zepto}{-21}\\
744 \unit{E}&\pfx{exa}{18}&
745 \unit{a}&\pfx{atto}{-18}\\
746 \unit{P}&\pfx{peta}{15}&
747 \unit{f}&\pfx{femto}{-15}\\
748 \unit{T}&\pfx{tera}{12}&
749 \unit{p}&\pfx{pico}{-12}\\
750 \unit{G}&\pfx{giga}{9}&
751 \unit{n}&\pfx{nano}{-9}\\
752 \unit{M}&\pfx{mega}{6}&
753 \unit{u}&\pfx{micro}{-6}\\
754 \unit{k}&\pfx{kilo}{3}&
755 \unit{m}&\pfx{milli}{-3}\\
756 \unit{h}&\pfx{hecto}{2}&
757 \unit{c}&\pfx{centi}{-2}\\
758 \unit{da}&\pfx{deca}{1}&
759 \unit{d}&\pfx{deci}{-1}\\
760 \hline
761 \end{tabular}
762 \qquad
763 \def\pfx#1#2{#1, $2^{#2}$}
764 \begin{tabular}{|rl|}\hline
765 \unit{Ki}&\pfx{kibi}{10}\\
766 \unit{Mi}&\pfx{mebi}{20}\\
767 \unit{Gi}&\pfx{gibi}{30}\\
768 \unit{Ti}&\pfx{tebi}{40}\\
769 \unit{Pi}&\pfx{pebi}{50}\\
770 \unit{Ei}&\pfx{exbi}{60}\\
771 \unit{Zi}&\pfx{zebi}{70}\\
772 \unit{Yi}&\pfx{yobi}{80}\\
773 \hline
774 \end{tabular}
775 \end{center}
776 \caption[VOUnits prefixes]{\label{tab:vouscalefactors}VOUnits prefixes:
777 (a, left) decimal prefixes;
778 (b, right) binary prefixes}
779 \end{table}
780
781 Writers of unit strings \norm{must not} use compound prefixes (that is,
782 more than one SI prefix). Prefixes are concatenated to the base
783 symbol without space, and \norm{must not} be used without a base symbol.
784
785 The SI prefixes of \prettyref{tab:vouscalefactors}a \emph{\norm{must}
786 always refer to multiples of 1000}, even when applied to binary units
787 such as bit or byte; this follows the stipulations (and clarifying note) of
788 \citet[\S3.1]{si-brochure}, and of \citet[\S6.5.4]{std:iso80000-1}.
789 If data providers wish to use multiples of 1024 (ie, $2^{10}$) for
790 units such as bytes or bits, they \norm{must} use the the binary prefixes
791 of \citet[\S4]{std:iec80000-13}, reproduced in \prettyref{tab:vouscalefactors}b
792 (these originated in \citet{std:ieee1541-2002}).
793
794 Note: the~`s' and~`b' annotations in \prettyref{tabx:knownunits}
795 are not symmetric: the~`s' annotation indicates that SI
796 prefixes are permitted in the given syntax, which means that they are
797 also recognised when preceding unknown units (which have no
798 restrictions on them); in contrast, binary prefixes are recognised
799 exclusively on units with a~`b' annotation, which means that they
800 are \emph{not} recognised with unknown units. That is,
801 the \unit{Mifurlong} is the mega-\texttt{ifurlong} and
802 the \unit{Kifurlong} is the unknown unit \texttt{Kifurlong}.
803
804 Note: The letter \unit{u} is used instead of the
805 \micro\ symbol to represent a factor of $10^{-6}$,
806 following the character set defined in \prettyref{sec:encoding}.
807
808 \subsection{Astronomy symbols}
809
810 \prettyref{tabx:comparUnitAstro} lists symbols used in astronomy to
811 describe times, angles, distances and a few additional quantities.
812 The subset of these used by this specification are
813 listed in \prettyref{tab:vouadopted}.
814
815 \begin{table}[t]
816 \begin{center}
817 \def\arraystretch{1.2}
818 \begin{tabular}{|rl|rl|rl|}\hline
819 \unit{min}&(minute of time) &\unit{deg}&(degree of angle) &\unit{Jy}&(jansky) \\
820 \unit{h}&(hour of time) &\unit{arcmin}&(arcminute) &\unit{pc}&(parsec) \\
821 \unit{d}&(day) &\unit{arcsec}&(arcsecond) &\unit{eV}&(electron volt) \\
822 \unit{a}, \unit{yr}&(year) &\unit{mas}&(milliarcsecond) &\unit{AU}&(astronomical\\
823 \unit{u}&(atomic mass) & & & & unit)\\
824 \hline
825 \end{tabular}
826 \end{center}
827 \caption{\label{tab:vouadopted}Additional astronomy symbols}
828 \end{table}
829
830
831 Minutes, hours, and days of time \norm{must} be represented in VOUnits by the
832 symbols \unit{min}, \unit{h} and \unit{d}; however the \unit{cd} is
833 the candela, not the centi-day.\footnote{We therefore rule out
834 interpreting \units{dB/cd} as 0.9\units{mbit/s}.} The year \norm{may} be expressed by
835 \unit{yr} (common practice),
836 or \unit{a},
837 as recommended by ISO \citep[Annex C]{std:iso80000-3}
838 and the IAU \citep[Table 6]{wilkins89}.
839 However peta-year must only be written \unit{Pyr},
840 to avoid the collision with the pascal, \unit{Pa}.
841
842 There are no VOUnit symbols for degrees celsius or century.
843 Temperatures are expressed in kelvin (\unit{K}),
844 and a century corresponds to \unit{ha} or \unit{hyr}.
845 Note that \emph{this is a mild deviation from the SI standard},
846 which states that the `hectare', with unit symbol \unit{ha},
847 is a `non-SI unit accepted for use' as a measure of land area~\citep[table~6]{si-brochure},
848 and which acknowledges neither `a' nor `yr' as a symbol for year.\footnote{If
849 large telescope arrays feel they must talk of attojoules per
850 hectare per century, for some reason, they're going to have to be
851 careful how they do so; it's probably best not to even think about atto-Henrys.}
852
853 The astronomical unit \norm{should} be expressed in upper-case, \unit{AU}, in
854 order to follow legacy practice. It may also be written \unit{au}, in
855 the VOUnits syntax, on the ground that it would be perverse to prefer
856 the atto-atomic-mass to the astronomical unit, in an astronomical unit
857 specification.
858 \emph{This is a deviation} from the SI recommendation of
859 `ua'~\citep[Table 7]{si-brochure}, but conformant with the IAU's
860 recommendation of `au'~\citep{iau12}.%
861 \footnote{If you feel a burning desire to write about micro-years or
862 atto atomic-mass, this document is not the place you need to look
863 for help.}
864
865 Because of the near-degeneracy between the decimal prefixes \texttt{d}
866 and \texttt{da}, there is an ambiguity when parsing the
867 unit \unit{dadu} -- is this the deka-\unit{du} or the deci-\unit{adu}?
868 The only cases where this ambiguity is possible are those involving
869 known units starting with~`a' (\texttt{da} is unambiguously a
870 deci-year for the same reason that \texttt{d} is unambiguously a day,
871 because the presence of a bare unit prefix would be ungrammatical).
872 We can think of no cases where the prefix is useful enough that
873 resolving the ambiguity is worth the specification effort, so we deem
874 the parse of \texttt{da.*} to be \textbf{unspecified}. %\footnote{The
875 %% Working Group was informed at the specification stage that the
876 %% dekagramme is still in use in parts of Europe for certain categories
877 %% of delicious comestibles; this was deemed insufficient to save the
878 %% prefix, since grocery shopping is not a core VOUnit use-case.}
879 In consequence, data providers \norm{must not} use the \texttt{da}
880 prefix, and \norm{should not} use the \texttt d prefix (as noted
881 in \prettyref{sec:other}, the decibel, \unit{dB} is listed as a `known
882 unit', as opposed to a deci-Bel).
883
884 \subsection{Other symbols, and other remarks}
885 \label{sec:other}
886
887 \prettyref{tabx:comparUnitDeprecated} corresponds to Table~7 in the IAU document, and the IAU strongly
888 recommends no longer using these units.
889 Data producers are strongly advised to prefer the equivalent notation using symbols and prefixes listed in
890 Tables~\ref{tabx:comparUnitBase}, \ref{tabx:comparUnitScale} and \ref{tabx:comparUnitAstro}.
891
892 However, in order to be compatible with legacy metadata, VOUnit
893 parsers \norm{should} be able to interpret symbols \unit{angstrom}
894 or \unit{Angstrom} (for \aa{}ngstr\"om), \unit{barn}, \unit{erg}
895 and \unit{G} (for gauss).
896
897 \prettyref{tabx:comparUnitOther} compares other miscellaneous symbols.
898 The last set of VOUnits symbols, derived from this comparison, is in
899 \prettyref{tab:voumisc}.
900
901 %\medskip
902 \begin{table}[ht]
903 \begin{center}
904 \def\arraystretch{1.2}
905 \begin{tabular}{|l|l|L{3cm}|l|}\hline
906 \unit{mag} (magnitude) &\unit{pix} or \unit{pixel} &\unit{solMass} (solar mass) &\unit{R} (rayleigh) \\
907 \unit{Ry} (rydberg) &\unit{voxel} &\unit{solLum} (solar luminosity)&\unit{chan} (channel) \\
908 \unit{lyr} (light year) &\unit{bit} &\unit{solRad} (solar radius) &\unit{bin} \\
909 \unit{ct} or \unit{count} &\unit{byte} (8 bits) &\unit{Sun} (relative to the Sun, e.g. abundances)&\unit{beam} \\
910 \unit{ph} or \unit{photon} &\unit{adu} &\unit{D} (Debye) &\unit{unknown} (\prettyref{sec:parsing-components})\\\hline
911 \end{tabular}
912 \end{center}
913 \caption[Miscellaneous VOUnits]
914 {\label{tab:voumisc}Miscellaneous VOUnits.}
915 \end{table}
916
917 A few symbols which might theoretically be ambiguous are listed in
918 \prettyref{tab:ambiguous},
919 with their consensus VOUnit interpretation.
920
921 \begin{table}[bht]
922 \begin{center}
923 \begin{tabular}{|r|l|l|}
924 \hline
925 \textbf{VOUnit}&\textbf{Correct interpretation}&\textbf{Incorrect}\\
926 \unit{Pa}&pascal&peta-year\\
927 \unit{ha}&hecto-year&hectare\\
928 \unit{cd}&candela&centi-day\\
929 \unit{dB}&decibel&deci-byte\\
930 \unit{B}&byte&bel\\
931 \unit{au}&astronomical unit&atto-atomic-mass\\
932 \hline
933 \end{tabular}
934 \end{center}
935 \caption{\label{tab:ambiguous}Possibly ambiguous units}
936 \end{table}
937
938 It can be noted that some of the units listed in \prettyref{tabx:comparUnitOther} are
939 questionable. They arise in fact from a need to describe quantities, when the only
940 piece of metadata available is the unit label. Count, photon, pixel, bin, voxel, bit,
941 byte are concepts, just as apple or banana. The associated quantities could be fully
942 described with a UCD, a value and a void unit label.
943 It is possible to count a number of bananas, or to express a distance measured in
944 bananas, but this does not make a banana a reference unit.
945
946 The FITS document provides the most general description of all the compared schemes,
947 and VOUnits adopts similar definitions, for the sake of legacy metadata.
948 The VOUnits symbol for magnitudes is \unit{mag}.
949 %% The symbol \unit{Sun} is used to express ratios relative to solar
950 %% values, for example abundances or metallicities.
951 Note that all symbols like \unit{count}, \unit{photon}, \unit{pixel}
952 are always used in lower case and singular form.
953
954 The decibel, \unit{dB} is listed in the SI specification
955 \citep[Table 8]{si-brochure} amongst a set of `other non-SI units',
956 and mentioned by \citet[\S0.5]{std:iso80000-3} in a `Remark on
957 logarithmic quantities'. The \unit{dB} is included in the list of
958 `known units' of \prettyref{tabx:knownunits} and so \norm{must} be parsed as a
959 unit by itself -- as opposed to being parsed as the prefix~`d'
960 qualifying the unit `Bel' -- and both the decibel and Bel \norm{must
961 not} be used with other scaling prefixes.
962
963 If there is no unit associated with a quantity (for example a quantity
964 that is a character string, or unitless), data providers \norm{should}
965 indicate this with an empty string rather than blanks or dashes.
966
967
968
969 \subsection{Mathematical expressions containing symbols}
970
971 \prettyref{tabx:comparUnitCombine} summarizes how,
972 in the various existing syntaxes, mathematical operations may
973 be applied on unit symbols for exponentiation, multiplication,
974 division, and other computations.
975
976 The combination rules are where the largest discrepancies between the
977 different schemes appear. The FITS document discusses the problem of
978 trying to best accommodate the existing schemes
979 \cite[\S4.3.1]{pence10}, without really resolving the problem.
980 \label{sec:fitsquote}
981 This and other ambiguities are discussed in the detailed syntaxes of \prettyref{appx:grammar}.
982
983 VOUnits follow a subset of the FITS rules,
984 as summarized in \prettyref{tab:VOUnitCombine}.
985
986 \begin{table}%[ht]
987 \begin{center}
988 \def\arraystretch{1.2}
989 \begin{tabular}{|r|l|}
990 \hline
991 %\unit{str1 str2} & Multiplication (discouraged -- see text)\\
992 %\unit{str1*str2} & Multiplication \\
993 \unit{str1.str2} & Multiplication \\
994 \unit{str1/str2} & Division \\
995 \unit{str1**expr} & Raised to the power expr \\
996 %\unit{str1\^{}expr} & Raised to the power expr \\
997 %\unit{str1expr} & Raised to the power expr \\
998 \unit{fn(str1)} & Function applied to a unit string\\
999 %% \unit{log(str1)} & Common Logarithm (to base 10) \\
1000 %% \unit{ln(str1)} & Natural Logarithm \\
1001 %% \unit{exp(str1)} & Exponential (e$^\mathrm{str1}$) \\
1002 %% \unit{sqrt(str1)} & Square root \\
1003 \hline
1004 \end{tabular}
1005 \end{center}
1006 \caption[Combination rules and mathematical expressions for VOUnits]
1007 {\label{tab:VOUnitCombine}Combination rules and mathematical expressions for VOUnits.
1008 See \prettyref{appx:vougrammar} for the complete grammar.}
1009 \end{table}
1010
1011 As illustrated in \prettyref{tab:VOUnitCombine}, units may include a
1012 limited set of functional dependencies on other units. The set of
1013 functions recognised within VOUnits is the same as the set recommended
1014 by FITS, and listed in \prettyref{tab:functions}. As with
1015 unrecognised units,
1016 \emph{parsers \norm{should} accept unrecognised functions without error},
1017 even if they deprecate them at some later processing stage. As
1018 described in \prettyref{sec:quoting}, functions may be quoted to
1019 indicate that they \norm{must not} be interpreted as in this table.
1020 \begin{table}%[ht]
1021 \begin{center}
1022 \def\arraystretch{1.2}
1023 \begin{tabular}{|r|l|}
1024 \hline
1025 \unit{log(str1)} & Common Logarithm (to base 10) \\
1026 \unit{ln(str1)} & Natural Logarithm \\
1027 \unit{exp(str1)} & Exponential (e$^{\mathrm{str1}}$) \\
1028 \unit{sqrt(str1)} & Square root \\
1029 \hline
1030 \end{tabular}
1031 \end{center}
1032 \caption{\label{tab:functions}Functions of units.}
1033 \end{table}
1034 Note that since functions such as `log' require dimensionless
1035 arguments, when a quantity~$x$ is (for example) represented by numbers
1036 labelled with units \unit{log(Hz)}, that indicates that the numbers
1037 are related to~$x$ by the function
1038 $\log\bigl(x/(\mathrm{1\,Hz})\bigr)$.
1039
1040 %\subsection{Quantities}
1041 %\label{sec:quantities}
1042 %
1043 %A quantity, \eg a measurement of a physical value like the speed of
1044 %light, has a value (2.998 10+5), a ucd (phys.veloc), units (km.s-1)
1045 %and is coded using a numerical type (real).
1046 %
1047 %Some quantities are also reused as units. Many units are expressed, or
1048 %converted, in terms of physical constants such as the speed of light,
1049 %\begin{itemize}
1050 % \item \texttt{$c=$~2.998 10+8~m.s-1;}
1051 % \item Boltzman's constant, \texttt{$K_{\mathrm{B}}=$1.38065~10-23}
1052 % \item \texttt{1 AU $=$ 1.499 10+11 m.}
1053 %\end{itemize}
1054 %
1055 % Many of these are used as units in their own right, \eg velocities may be expressed as a
1056 %fraction or multiple of c, but c is also used to convert between
1057 %wavelength and frequency, etc. These are combinations of units with
1058 %scaling factors applied, and so can be treated in the same way as any
1059 %other compound unit \eg the \texttt{Jy} (\texttt{10-26 W.m-2.Hz-1}) .
1060 %
1061 %We need to ensure that we are consistent with the IVOA Quantity model,
1062 %where appropriate.
1063
1064 \subsection{The numerical scale-factor}
1065 \label{sec:scalefactor}
1066
1067 A VOUnits unit string \norm{may} start with a numerical scale-factor
1068 to indicate a derived unit. For example, the inch might appear as the
1069 unit of \unit{25.4mm}. See \prettyref{appx:vougrammar} for the syntax
1070 of the VOUnits numerical string.
1071
1072 A data provider may choose to use such a unit in order to represent a
1073 unit which is not listed as one of the VOUnit `known units'. For
1074 example, given a VOTable column of masses relative to Jupiter's mass,
1075 one might label it as having units of \unit{1.898E27kg} rather than
1076 \unit{'jupiterMass'} (an `unknown unit').
1077 The \emph{advantage} of doing so is that the data consumer can
1078 translate the column data into well-known physical units without further
1079 information, and the data source is thus self-contained.
1080 The \emph{disadvantage} of doing so is (i) that the intention might be
1081 obscured (this is a type of provenance information);
1082 and (ii) that the measurements may be relative to the actual
1083 jupiter mass rather than merely expressed in those terms, so that they
1084 should change if the actual mass were to be refined as a result of
1085 a recalibration. The data provider retains the choice of which
1086 strategy to take.
1087
1088 This Recommendation does not prescribe how many significant figures
1089 should be in a scale-factor, nor whether it should be interpreted as
1090 single- or double-precision, nor how units with scale-factors should
1091 be compared for equality. All of these are implementation choices for
1092 the software which is handling the units.
1093
1094 \subsection{Quoting unknown units\label{sec:quoting}}
1095
1096 The VOUnits syntax permits the use of `unknown units' (that is, units not listed
1097 in \prettyref{tabx:knownunits}). There need be no syntactic indication that
1098 a unit is `unknown'; this is convenient, but creates some minor
1099 ambiguities.
1100
1101 In the VOUnits syntax, base symbols may be put between single
1102 quotes \unit{'...'} (a significant divergence from the other
1103 syntaxes). Such symbols \norm{must} be parsed as
1104 unrecognised unit symbols which are not further examined.
1105
1106 This has two consequences. Firstly, it means that an unknown symbol
1107 which happens to start with an SI prefix is not broken
1108 into a base symbol and prefix: thus \unit{'furlong'} is parsed as
1109 expected, whereas \unit{furlong} would be the femto-`urlong'.
1110 Secondly, a quoted symbol is parsed as an unrecognised unit, even if
1111 it would otherwise indicate a known unit; thus the unit \unit{'m'} is
1112 parsed as an unknown unit `m', and does not indicate the metre.
1113
1114 This facility means that a data provider may label data with units of,
1115 for example, \unit{'martianDay'} or the \unit{'B'}, while still
1116 remaining conformant with the VOUnits Recommendation, and without
1117 risking the leading \texttt{m} being misparsed as an SI prefix, or the
1118 `B' being misparsed as a `byte'.
1119
1120 Quoted units can take prefixes (they are `unknown units', so there are
1121 no restrictions on their usage), so that \unit{m'furlong'} is a
1122 milli-furlong, and \unit{m'm'} is a milli-`m'. The only permissible
1123 prefixes are those of \prettyref{tab:vouscalefactors}.
1124
1125 \subsection{General rationale (informative)}
1126 \label{sec:rationale}
1127
1128 \subsubsection{Deviations from other syntaxes}
1129 \label{sec:deviations}
1130
1131 The aspiration of the VOUnits work was that the syntax should be as
1132 much as possible in the intersection of the various pre-existing
1133 syntaxes, so that a unit string which conformed to the VOUnits syntax
1134 would be parsable in each of those other syntaxes. This has not been
1135 possible in fact, for four reasons.
1136 \begin{enumerate}
1137 \item The CDS syntax permits only a dot to indicate a product, and the
1138 OGIP syntax only a star, while FITS permits both. The VOUnits syntax
1139 uses a dot, so that non-trivial OGIP unit strings are therefore
1140 necessarily invalid VOUnits strings in this one respect.
1141 \item The VOUnits syntax permits (but does not require) a scale-factor
1142 at the beginning of the string, which is not a power of 10. Only the
1143 CDS syntax permits a similar factor.
1144 See \prettyref{sec:scalefactor} for discussion.
1145 \item Only the VOUnits syntax permits quoted units.
1146 \item Only the VOUnits syntax permits the use of the binary prefixes
1147 of \prettyref{tab:vouscalefactors}.
1148 \end{enumerate}
1149 The first is both unavoidable in specification, and largely
1150 unavoidable in practice; the others are VOUnit extensions which a data
1151 provider may of course decline to take advantage of.
1152
1153 The scale-factor and quoted-units extensions are intended to support
1154 the case where the data provider wishes to distribute data including a
1155 unit which is `unknown', but which the provider nonetheless feels is
1156 necessary or useful; this should be done only after weighing the
1157 considerations of Sects.~\ref{sec:scalefactor} and~\ref{sec:quoting}.
1158 For the sake of consistency, and in order to allow
1159 constructions such as \texttt{M'jupiterMass'}, the grammar permits quoted
1160 units to take scaling prefixes; this is not often likely to be a good idea.
1161
1162 A VOUnits string which avoids the three extensions above will be
1163 parsable, with the same meaning, in the CDS and FITS syntaxes, and
1164 will be parsable by an OGIP parser if dots are replaced by stars.
1165
1166 \subsubsection{Restrictions to ASCII}
1167
1168 As described above, VOUnit unit strings are restricted to printable
1169 ASCII characters. While the two most prominent uses of these strings
1170 will be within VOTable attributes (\verb|unit="..."|) and in XML
1171 serialisations of a data model (for example \verb|<unit>...</unit>|),
1172 we also intend them to be usable within FITS files and within
1173 databases. Neither of the latter two contexts is necessarily
1174 unicode-friendly, so permitting non-ASCII characters in a unit string
1175 (such as \AA\ or $\mu$) is more likely than not to cause trouble.
1176
1177 Similarly, forbidding spaces within VOUnit strings removes one (minor)
1178 complication when recognising them in use.
1179
1180 \subsubsection{Other units, and unit-like expressions}
1181
1182 As noted above, the VOUnits syntax does not include structures such as
1183 arrays or tuples of numbers. We include in this category sexagesimal
1184 coordinates, calendar dates (in ISO-8601 form or otherwise),
1185 RA-Dec pairs, and other structured quantities serialised as strings.
1186 Each of these is well-specified elsewhere, and would require a
1187 separate parser if encountered in data.
1188
1189 Existing VO standards already recommend that coordinates be expressed
1190 in decimal degrees.
1191
1192 Quantities like the Modified Julian Date (MJD) are also not recognized
1193 VOUnits. As described in \prettyref{sec:notations}, the quantity MJD
1194 can be seen as a concept (described by the appropriate UCD or utype),
1195 and the corresponding value will most likely be expressed in days, so
1196 the VOUnit will be \unit{d}. There is no need to overload VOUnits to
1197 incorporate the description of concepts themselves.
1198
1199 The notion of unit conversion and quantity manipulation is discussed in
1200 \prettyref{sec:conversion}.
1201
1202 \section{Use cases and applications (informative)\label{sec:useCase}}
1203
1204 \subsection{Unit parsing}
1205
1206 The rules defined in \prettyref{sec:proposal} allow us to build VOUnit parsers.
1207 Several services can be built on top of a VOUnit parser:
1208
1209 \begin{enumerate}
1210 \item Validation. A service checking that a VOUnit is well written. The output
1211 of such a service can have different levels: fully valid unit; valid syntax, but
1212 not the preferred one (\eg use of deprecated symbols); parsing error.
1213 \item Explanation. A service returning a plain-text explanation of the unit label.
1214 \item Typesetting. A service returning an equivalent of the unit label suitable for inclusion in
1215 a \LaTeX\ or HTML document.
1216 \item Dimensional equation. As described by \citet{osuna05}, VOUnits can be translated
1217 into a dimensional equation, allowing to build up conversions methods from one string
1218 representation to another one (see also \prettyref{sec:conversion}).
1219 \end{enumerate}
1220
1221 \subsection{Libraries\label{sec:libraries}}
1222
1223 There are a few existing libraries able to interpret unit labels.
1224 In all cases,
1225 some software effort is required if they are to be used in translating
1226 between data provider unit labels, and those to be adopted by
1227 the IVOA for internal use.
1228
1229 One of the most widely-used specialised
1230 astronomical libraries is AST which includes a unit conversion
1231 facility attached to astronomical coordinate systems \citep{berry12}.
1232
1233 Another library has been developed at
1234 CDS\footnote{\url{http://cds.u-strasbg.fr/resources/doku.php?id=units}},
1235 and can be tested online\footnote{\url{http://cdsweb.u-strasbg.fr/cgi-bin/Unit}}. This library covers all
1236 the symbols and notations defined in the standard for astronomical catalogues \citep[\S3.2]{cds00}, as well as
1237 additional symbols and notations.
1238
1239 The Unity library\footnote{\url{https://bitbucket.org/nxg/unity}} is a new
1240 standalone library intended to parse unit strings in the VOUnits,
1241 OGIP, StdCats and FITS syntaxes; it was used as a vehicle for
1242 developing and testing the grammars and
1243 ideas for this present document. It provides yacc-style grammars for
1244 the various syntaxes, as well as implementing them in parsers written
1245 in Java and~C. The grammars of \prettyref{appx:grammar} are extracted
1246 from the Unity distribution.
1247
1248 \subsection{Unit conversion and quantity transformation\label{sec:conversion}}
1249
1250 Unit conversion is the simple task of converting a quantity expressed
1251 in a given unit into a different unit, while the concept remains the
1252 same. For example, such a library might be able to convert a distance
1253 in \unit{pc} into a distance in \unit{AU} or \unit{km}, or convert a
1254 flux from \unit{mJy} to \unit{W.m-2.Hz-1}. This is rather easy with
1255 existing libraries, using dimensional analysis or SI units as a
1256 reference.
1257
1258 Quantity transformation consists in deriving a new quantity from one or several original
1259 quantities. It is more complex, because it requires having a precise model
1260 (a simple equation in simple cases) for computing the transformation. The model involves
1261 quantities, each described with a UCD or utype, value and VOUnit. Some of the quantities
1262 involved might be physical constants (\eg Boltzmann's constant $k_{\mathrm{B}}$).
1263
1264 Examples of such transformations can be:
1265 \begin{itemize}
1266 \item linear unit conversion: a distance is measured in \unit{pixel} in an image, and needs to be transformed in
1267 the corresponding angular separation in \unit{arcsec}. This can be done if the quantity representing the pixel
1268 scale is given, with its value and a compatible unit like \unit{deg/pixel}.
1269 \item converting a photon wavelength in the corresponding photon energy or frequency.
1270 \item deriving the flux for a given photon emission rate (in \units* W) from Planck's
1271 constant ($6.63 \times 10^{-34}\units{J~s}$), the radiation frequency (in \units{GHz}), and the
1272 number of photons emitted per second.
1273 \item transforming a magnitude into a flux, as needed for SED building.
1274 \end{itemize}
1275
1276 VOUnits can help in quantity transformation if all quantities are qualified with proper VOUnits.
1277
1278 \subsection{Query languages}
1279
1280 Including VOUnits in queries is not an easy task. Some guidelines were
1281 articulated during the development of the ADQL standard.
1282
1283 \begin{enumerate}
1284 \item All data providers should be encouraged to supply units for each column
1285 of a table. Columns should also have associated UCDs, so that quantities can be
1286 properly identified.
1287
1288 %In most published tables in Astronomical journals and Vizier server as well, unitless values are
1289 %represented by "---". This could be adopted for the VO convention as well.
1290 \item The IVOA needs to provide a parser to relate the native units to the standard IVOA
1291 labels (in this context, the `native units' are the units of the
1292 underlying database table or metadata).
1293
1294 \item
1295 The default response to a query which does not specify units, will be
1296 in the native units of the table.
1297 %\emph{We recommand that the output units will be labelled using the IVOA standard label ???}
1298
1299 \item
1300 Where queries involve combining or otherwise operating on the content
1301 of columns to produce an output column with modified units, we can
1302 provide libraries and a parser to assist in assigning and checking a
1303 new unit, and attach this to the returned values via the SQL CAST
1304 operator.
1305 This is implemented already in database related applications such as
1306 Saada\footnote{\url{http://saada.unistra.fr/}}, for instance.
1307 If any column used in responding to a query lacks a necessary unit, the output
1308 involving that column will be unitless.
1309
1310 \item
1311 If the user wants to change the output units with respect to the table
1312 units, this could be done by specifying the units in the initial
1313 SELECT statement. There are several issues to consider:
1314 \begin{enumerate}
1315 \item Does the user also need to include the conversion expression, or does the unit
1316 parser take care of that?
1317 \item Can the user use this to assign units (based on prior knowledge) to output from a
1318 column lacking a unit?
1319 \end{enumerate}
1320 \end{enumerate}
1321
1322
1323 \subsection{Broader use in the VO}
1324
1325 \begin{figure}[thb]
1326 % Requires \usepackage{graphicx}
1327 \includegraphics[width=\textwidth]{./units2.jpg}
1328 \caption{This shows the levels at which conversions might be done.
1329 \textcolor{blue}{Plain arrows}: At the point where an astronomer or
1330 data provider submits input to the VO, we should provide tools to
1331 ensure that units are labeled consistently according to VOUnits.
1332 This implies that a units parsing step is included prior to metadata ingestion into the VO.
1333 \brown{Dashed arrows}: Conversions required to supply results to
1334 the user in specified or user-prefered units \eg \texttt{J.s-1} to \texttt{W}, are done where and when they are required.}
1335 \label{fig:units2}
1336 \end{figure}
1337
1338 Different VO entities require and consume metadata with units attached like registries,
1339 applications and interoperate via protocols. \prettyref{fig:units2} illustrates the places where the IVOA
1340 could intervene to ensure consistent use of units.
1341
1342
1343 \clearpage
1344 % Put a \clearpage before each appendix -- these are fairly distinct,
1345 % and users may well want to refer to individual ones, so make it easy
1346 % to jump between them.
1347
1348 \appendix
1349
1350 \section{Current use of units (informative)}
1351 \label{appx:current}
1352
1353 Many other projects have already produced lists of preferred
1354 representations of units. Those most commonly used in
1355 astronomy are described in this section.
1356
1357 The four first schemes described below are used as references for the
1358 comparison tables presented later in this document.
1359
1360 \subsection{IAU 1989\label{appx:IAU}}
1361
1362 In the section 5.1 of its Style Manual, the IAU gives a set
1363 of recommendations for representing units in publications \citep{wilkins89}. This document
1364 therefore provides useful reference guidelines, but is not directly
1365 applicable to VOUnits because the recommendations are more intended
1366 for correct typesetting in journals than for standardized metadata exchange.
1367 The IAU style will be summarized in the second column of the comparison tables.
1368
1369 \subsection{OGIP 1993}
1370
1371 NASA has defined a list of character strings specifying the basic physical units
1372 used within OGIP (Office of Guest Investigator Programs) FITS files \citep{george95}. Rules and guidelines on the construction
1373 of compound units are also outlined.
1374
1375 HEASARC datasets follow these conventions, presented in the third column
1376 of the comparison tables.
1377
1378 \subsection{Standards for astronomical catalogues}
1379
1380 The conventions adopted at CDS are summarized in the Standards for Astronomical
1381 Catalogues, Version 2.0 \citep[\S3.2]{cds00}. They are presented in the fourth column
1382 of the comparison tables.
1383
1384 \subsection{FITS 2010}
1385
1386 In Section 4.3 of the reference FITS paper, \citet{pence10} describe how unit strings are to be expressed in
1387 FITS files. The recommendations are presented in the fifth column
1388 of the comparison tables.
1389
1390 \subsection{Other usages}
1391
1392 \begin{description}
1393 \item[\url{http://arxiv.org/pdf/astro-ph/0511616}]
1394 Dimensional Analysis applied to spectrum handling in VO context~\citep{osuna05}
1395 offers a mathematical framework to guess and recompute
1396 SI units for any quantity in astronomy.
1397
1398 \item[\url{http://unitsml.nist.gov}]
1399 The NIST (National Institute of Standards \& Technology) project
1400 UnitsML builds up an XML representation of units at the granularity
1401 level of a simple symbol string.
1402
1403 \item[\url{https://www.jcp.org/en/jsr/detail?id=275}]
1404 JAVA JSR-275 specifies Java packages for the programmatic
1405 handling of physical quantities and their expression as numbers of
1406 units.
1407 \item[\texttt{aips++} and \texttt{casacore}]
1408 These systems (see \url{http://aips2.nrao.edu/docs/aips++.html} and
1409 \url{http://code.google.com/p/casacore/}) contain modules handling
1410 units and quantities with high precision. The packages are mainly in use for
1411 radio astronomy but are designed to be modular and adaptable (NB:
1412 contrary to the statement on the casacore link, aips++ is still very much in
1413 use as the toolkit behind the \textsc{casa} package).
1414 %\item IAU SOFA
1415 %\violet{\footnotesize{\url{http://www.iau-sofa.rl.ac.uk/}}} and\\
1416 %USNO NOVAS
1417 %\violet{\footnotesize{\url{http://aa.usno.navy.mil/software/novas/novas_info.php}}}\\
1418 %implement the IAU 2000 recommendations.
1419 \end{description}
1420
1421 \clearpage
1422 \section{History: Comparison of syntaxes (informative)\label{appx:comparisons}}
1423 %\section{History: Comparison of unit-string schemes (informative)\label{appx:comparisons}}
1424
1425 In this section, we compare the existing unit-string syntaxes and the
1426 proposed standard. We have included these comparisons for
1427 more-or-less historical reasons, to try to highlight the variations
1428 between syntaxes, and so illustrate the motivation for this
1429 Recommendation, namely that the current practice, though it may at
1430 first appear to have rough consensus, is disturbingly heterogeneous.
1431
1432 \begin{table}[ht]
1433 \begin{tabular}{|L{0.2\linewidth}|p{0.12\linewidth}|p{0.12\linewidth}|p{0.12\linewidth}|p{0.12\linewidth}|p{0.12\linewidth}|}
1434 \hline
1435 & IAU & OGIP & StdCats & FITS & VOUnits\\\hline
1436 Units are strings of chars & & YES & & YES & YES\\\hline
1437 Case sensitive & YES & YES & YES & YES & YES\\\hline
1438 Character set & & & No spaces & ASCII text & ASCII printable\\\hline
1439 \end{tabular}
1440 \caption{Comparison of string representation and encoding.}
1441 \label{tabx:comparUnitEncoding}
1442 \end{table}
1443
1444 \begin{table}[ht]
1445 \begin{tabular}{|L{0.2\linewidth}|L{0.12\linewidth}|L{0.12\linewidth}|L{0.12\linewidth}|L{0.12\linewidth}|L{0.12\linewidth}|}
1446 \hline
1447 & IAU & OGIP & StdCats & FITS & VOUnits\\\hline
1448 The 6+1 base & \multicolumn{5}{c|}{\unit{m, s, A, K, mol, cd}} \\
1449 \cline{2-6}
1450 SI units (use \unit{s}, not sec, for seconds) & (1) & \unit{kg} & \unit{g} & \unit{kg}, but \unit{g} allowed & \unit{g}\\
1451 \hline
1452 Dimensionless planar and solid angle
1453 & \multicolumn{3}{c|}{\unit{rad}, \unit{sr}}
1454 & \unit{rad}, \unit{sr}, \unit{deg} (2)
1455 & \unit{rad}, \unit{sr}\\ \hline
1456 Derived units & \multicolumn{5}{c|}{\unit{Hz, N, Pa, J, W, C, V,}} \\
1457 with symbols & \multicolumn{5}{c|}{\unit{S, F, Wb, T, H, lm, lx}} \\
1458 & \unit{$\Omega$} & \unit{ohm} & \unit{Ohm} & \unit{Ohm} & \unit{Ohm}\\\hline
1459 \end{tabular}
1460 \caption[Comparison of base units]{Comparison of base units. Notes: (1) unit is \unit{kg}, but use \unit{g} with prefixes; (2) \unit{deg} preferred for decimal angles}
1461 \label{tabx:comparUnitBase}
1462 \end{table}
1463
1464 %\subsection{Scale factors}
1465
1466 \begin{table}[ht]
1467 \begin{tabular}{|L{0.2\linewidth}|L{0.12\linewidth}|L{0.12\linewidth}|L{0.12\linewidth}|L{0.12\linewidth}|L{0.12\linewidth}|}
1468 \hline
1469 & IAU & OGIP & StdCats sec.~3.2.3 & FITS & VOUnits\\\hline
1470 Scale factors, & \multicolumn{5}{c|}{\unit{d, c, m, n, p, f, a}} \\
1471 (multiple) & \multicolumn{5}{c|}{\unit{da, h, k, M, G, T, P, E}} \\
1472 prefixes & \unit{\micro} & \multicolumn{3}{c|}{\unit{u}} & \unit{u}\\
1473 & & \multicolumn{3}{c|}{\unit{z, y, Z, Y}} & \unit{z, y, Z, Y}\\\hline
1474 Prefix--symbol concatenation & (1) & (2) & no space & no space (implicit) & no space\\\hline
1475 Prefix-able symbols & Not \unit{kg}: use \unit{g} & (3) & all & all & (4) \\\hline
1476 Use compound prefixes & should not & should never & must not & must not & must not\\\hline
1477 \end{tabular}
1478 \caption[Comparison of scale-factors]{Comparison of scale-factors.
1479 Notes: (1) no space, regarded as single symbol;
1480 (2)~no space, regarded as a single unit string;
1481 (3)~all units above, and \unit{eV, pc, Jy, Crab} Only \unit{mCrab} allowed;
1482 (4)~all (except \unit{P} for \unit{a}).}
1483 \label{tabx:comparUnitScale}
1484 \end{table}
1485
1486 \begin{table}[ht]
1487 \begin{tabular}{|L{0.2\linewidth}|L{0.15\linewidth}|L{0.12\linewidth}|L{0.12\linewidth}|L{0.12\linewidth}|L{0.15\linewidth}|}
1488 \hline
1489 & IAU & OGIP & StdCats & FITS & VOUnits\\\hline
1490 minute & \unit{min, $^\mathrm{m}$} & \unit{min} & \unit{min} & \unit{min} & \unit{min}\\\hline
1491 hour & \unit{h, $^\mathrm{h}$} & \unit{h} & \unit{h} & \unit{h} & \unit{h}\\\hline
1492 day & \unit{d, $^\mathrm{d}$} & \unit{d} & \unit{d} & \unit{d} & \unit{d}\\\hline
1493 year & \unit{a} & \unit{yr} & \unit{a, yr} & \unit{a, yr} (1)& like FITS\\\hline
1494 arcsecond & \unit{''} & \unit{arcsec} & \unit{arcsec} & \unit{arcsec} & \unit{arcsec}\\\hline
1495 arcminute & \unit{'} & \unit{arcmin} & \unit{arcmin} & \unit{arcmin} & \unit{arcmin}\\\hline
1496 degree (angle) & \unit{$^\circ$} & \unit{deg} & \unit{deg} & \unit{deg} & \unit{deg}\\\hline
1497 milliarcsecond & \unit{mas} (use \unit{nrad}!) & & \unit{mas} & \unit{mas} & \unit{mas}\\\hline
1498 microarcsec & & & \unit{uarcsec} & & (2)\\\hline
1499 cycle & \unit{c, $^\mathrm{c}$} & & & & not used\\\hline
1500 astronomical unit & \unit{au} & \unit{AU} & \unit{AU} & \unit{AU} & \unit{AU}\\\hline
1501 parsec & \multicolumn{4}{c|}{\unit{pc}} & \unit{pc}\\\hline
1502 atomic mass & \unit{u} & & & \unit{u} & \unit{u}\\\hline
1503 electron volt & \multicolumn{4}{c|}{\unit{eV}} & \unit{eV}\\\hline
1504 jansky & \multicolumn{4}{c|}{\unit{Jy}} & \unit{Jy}\\\hline
1505 celsius degree & \unit{$^\circ$C} for meteorology, other use \unit{K}& & & & not used\\\hline
1506 century & (3)& & & & (4)\\\hline
1507 \end{tabular}
1508 \caption[Comparison of astronomy-related units]{Comparison of astronomy-related units.
1509 Notes: (1) Pa (peta-a) forbidden;
1510 (2) no dedicated symbol, use \unit{uarcsec};
1511 (3) ha, cy should not be used;
1512 (4) no dedicated symbol, use \unit{ha} or \unit{hyr}}
1513 \label{tabx:comparUnitAstro}
1514 \end{table}
1515
1516 \begin{table}[ht]
1517 \begin{tabular}{|L{0.2\linewidth}|L{0.15\linewidth}|L{0.12\linewidth}|L{0.12\linewidth}|L{0.12\linewidth}|L{0.15\linewidth}|}
1518 \hline
1519 & IAU & OGIP & StdCats & FITS & VOUnits\\\hline
1520 %\multicolumn{6}{|c|}{IAU (Table 7) strongly recommends to no longer use these} \\\hline
1521 \aa{}ngstr\"om & \unit{\AA} & \unit{angstrom} & 0.1nm & \unit{Angstrom} & \unit{angstrom}, \unit{Angstrom}\\\hline
1522 micron & \unit{\micro} & & & & not used \\\hline
1523 fermi & no symbol & & & & not used \\\hline
1524 barn & \unit{b} & \unit{barn} & \unit{barn} & \unit{barn} & \unit{barn}\\\hline
1525 cubic centimetre & \unit{cc} & & & & no dedicated symbol\\\hline
1526 dyne & \unit{dyn} & \unit{} & \unit{} & \unit{} & not used \\\hline
1527 erg & \unit{erg} & \unit{erg} & (1) & \unit{erg} & \unit{erg} \\\hline
1528 % erg & \unit{erg} & \unit{erg} & No symbol. \unit{mW/m2} used for erg.cm-2.s-1 & \unit{erg} & \unit{erg} \\\hline
1529 calorie & \unit{cal} & \unit{} & \unit{} & \unit{} & not used \\\hline
1530 bar & \unit{bar} & \unit{} & \unit{} & \unit{} & not used \\\hline
1531 atmosphere & \unit{atm} & \unit{} & \unit{} & \unit{} & not used \\\hline
1532 gal & \unit{Gal} & \unit{} & \unit{} & \unit{} & not used \\\hline
1533 eotvos & \unit{E} & \unit{} & \unit{} & \unit{} & not used \\\hline
1534 gauss & \unit{G} & \unit{G} & \unit{} & \unit{G} & \unit{G} \\\hline
1535 gamma & \unit{$\gamma$} & \unit{} & \unit{} & \unit{} & not used \\\hline
1536 oersted & \unit{Oe} & \unit{} & \unit{} & \unit{} & not used \\\hline
1537 Imperial, non-metric & should not be used & \unit{} & \unit{} & \unit{} & not used \\\hline
1538 \end{tabular}
1539 \caption[Comparison of symbols deprecated by IAU]{Comparison of
1540 symbols deprecated by IAU (from \citet{wilkins89}: ``Table 7. Non-SI
1541 units and symbos whose continued use is deprecated'').
1542 Note: (1) no symbol -- \unit{mW/m2} used for \units{erg\,cm^{-2}\,s^{-1}}.}
1543 \label{tabx:comparUnitDeprecated}
1544 \end{table}
1545
1546 \begin{table}[ht]
1547 \begin{tabular}{|p{0.2\linewidth}|p{0.15\linewidth}|p{0.12\linewidth}|p{0.12\linewidth}|p{0.12\linewidth}|p{0.15\linewidth}|}
1548 \hline
1549 & IAU & OGIP & StdCats & FITS & VOUnits\\\hline
1550 magnitude & \multicolumn{4}{c|}{\unit{mag}} & \unit{mag}\\\hline
1551 rydberg & \unit{} & \unit{} & \unit{Ry} & \unit{Ry} & \multirow{19}{0.15\linewidth}{same as FITS} \\\hline
1552 solar mass & \unit{$\mathrm{M}_\odot$} & & \unit{solMass} & \unit{solMass} &\\\cline{1-5}
1553 solar luminosity & \unit{} & \unit{} & \unit{solLum} & \unit{solLum} &\\\cline{1-5}
1554 solar radius & \unit{} & \unit{} & \unit{solRad} & \unit{solRad} &\\\cline{1-5}
1555 light year & \unit{} & \unit{lyr} & \unit{} & \unit{lyr} &\\\cline{1-5}
1556 count & \unit{} & \unit{count} & \unit{ct} & \unit{ct, count} &\\\cline{1-5}
1557 photon & \unit{} & \unit{photon} & \unit{} & \unit{photon, ph} &\\\cline{1-5}
1558 rayleigh & \unit{} & \unit{} & \unit{} & \unit{R} &\\\cline{1-5}
1559 pixel & \unit{} & \unit{pixel} & \unit{pix} & \unit{pix, pixel} &\\\cline{1-5}
1560 debye & \unit{} & \unit{} & \unit{D} & \unit{D} &\\\cline{1-5}
1561 relative to Sun & \unit{} & \unit{} & \unit{Sun} & \unit{Sun} &\\\cline{1-5}
1562 channel & \unit{} & \unit{chan} & \unit{} & \unit{chan} &\\\cline{1-5}
1563 bin & \unit{} & \unit{bin} & \unit{} & \unit{bin} &\\\cline{1-5}
1564 voxel & \unit{} & \unit{voxel} & \unit{} & \unit{voxel} &\\\cline{1-5}
1565 bit & \unit{} & \unit{} & \unit{bit} & \unit{bit} &\\\cline{1-5}
1566 byte & \unit{} & \unit{byte} & \unit{byte} & \unit{byte} &\\\cline{1-5}
1567 adu & \unit{} & \unit{} & \unit{} & \unit{adu} &\\\cline{1-5}
1568 beam & \unit{} & \unit{} & \unit{} & \unit{beam} &\\\hline
1569 & \unit{} & \unit{Crab} avoid use & \unit{} & \unit{} & not used \\\hline
1570 No unit, dimensionless & \unit{} & blank string & \unit{-} & \unit{} & empty string \\\hline
1571 Percent & & & \unit{\%} & & \unit{\%} \\\hline
1572 unknown & \unit{} & {\tiny\unit{UNKNOWN}} & \unit{} & \unit{} & \unit{unknown} \\\hline
1573 \end{tabular}
1574 \caption{Miscellaneous other symbols.}
1575 \label{tabx:comparUnitOther}
1576 \end{table}
1577
1578 \begin{table}[th]
1579 \begingroup
1580 \begin{tabular}{|L{0.2\linewidth}|L{0.15\linewidth}|L{0.15\linewidth}|L{0.15\linewidth}|L{0.15\linewidth}|}
1581 \hline
1582 & IAU & OGIP & StdCats & FITS \\\hline
1583 %\multicolumn{6}{|c|}{Compound units} \\\hline
1584 Multiplication & space or dot (1)
1585 & space or star (2)
1586 & dot
1587 & space or\hfil\break star (3) \\\hline
1588 Division & per (4)
1589 & \unit{/} (5)
1590 & \unit{/}, no space
1591 & \unit{/}, no space\\\hline
1592 Use of multiple / & never
1593 & allowed
1594 & allowed
1595 & discouraged (6) \\\hline
1596 \unit{sym} raised to the power $y$ & superscript
1597 & (7)
1598 & (8)
1599 & (9) \\\hline
1600 Exponential of \unit{sym} & & \unit{exp(sym)} & & \unit{exp(sym)} \\\hline\hline
1601 Natural log of \unit{sym} & & \unit{ln(sym)} & & \unit{ln(sym)} \\\hline\hline
1602 Decimal log of \unit{sym} & & \unit{log(sym)} & \unit{[sym]} & \unit{log(sym)} \\\hline
1603 Square root of \unit{sym} & & \unit{sqrt(sym)} & & \unit{sqrt(sym)} \\\hline
1604 Other math & & (10) & & not used \\\hline
1605 ( ) & & allowed & allowed & optional around powers \\\hline
1606 powers & superscripts & (11) & integers & (12) \\\hline
1607 Numeric factor & not used & (13) & allowed & (14) \\\hline
1608 \end{tabular}
1609 \endgroup
1610 \caption[Mathematical expressions and combinations]{Mathematical expressions and symbol combinations.
1611 \label{tabx:comparUnitCombine}
1612 Notes: (1) space, except if previous unit ends with superscript; dot (\unit{.}) may be used;
1613 (2)~one or more spaces OR one asterisk (\unit{*}) with optional spaces on either side;
1614 (3)~single space OR asterisk (\unit{*}, no spaces) OR dot (\unit{.}, no spaces);
1615 (4)~use negative index or solidus (\unit{/});
1616 (5)~solidus (\unit{/}) with optional spaces on either side, space not recommended after / OR negative index;
1617 (6)~may be used, but discouraged, `math precedence rule';
1618 (7)~\unit{sym**($y$)} parenthesis optional if $y>0$;
1619 (8)~nothing -- \unit{sym$y$}, and use $+/-$ sign for \unit{10+21};
1620 (9)~\unit{sym$y$} OR \unit{sym**($y$)} OR \unit{sym\^{}($y$)}, no space;
1621 (10)~\unit{$f$(sym)}, where $f$ is
1622 \unit{sin}, \unit{cos}, \unit{tan}, \unit{asin}, \unit{acos}, \unit{atan}, \unit{sinh}, \unit{cosh}, \unit{tanh};
1623 (11)~decimal and integer fractions allowed;
1624 (12)~integer (sign and () optional), OR decimal or ratio between ();
1625 (13)~should be avoided; only powers of 10 allowed; should precede any unit string;
1626 (14)~optional 10**k, 10\texttt{\^}k, or 10$\pm$k.}
1627 \end{table}
1628
1629 \iffalse
1630 \begin{longtable}[th]{|L{0.2\linewidth}|L{0.2\linewidth}|L{0.12\linewidth}|L{0.12\linewidth}|L{0.22\linewidth}|}
1631 \hline
1632 & IAU & OGIP & StdCats & FITS \\\hline
1633 %\multicolumn{6}{|c|}{Compound units} \\\hline
1634 Multiplication & space, except if previous unit ends with superscript; dot (\unit{.}) may be used
1635 & one or more spaces OR one asterisk (\unit{*}) with optional spaces on either side
1636 & dot (\unit{.}), no space
1637 & single space OR asterisk (\unit{*}, no spaces) OR dot (\unit{.}, no spaces) \\\hline
1638 Division & per. Use negative index or solidus (\unit{/})
1639 & solidus (\unit{/}) with optional spaces on either side, space not recommended after / OR negative index
1640 & \unit{/} with no spaces
1641 & \unit{/} with no spaces \\\hline\hline
1642 Use of multiple / & MUST never use two /
1643 & allowed
1644 & allowed
1645 & may be used, discouraged, math precedence rule \\\hline\hline
1646 \unit{sym} raised to the power $y$ & superscript
1647 & \unit{sym**($y$)} parenthesis optional if $y>0$
1648 & nothing: \unit{sym$y$} use +/- sign for \unit{10+21}
1649 & \unit{sym$y$} OR \unit{sym**($y$)} OR \unit{sym\^{}($y$)}, no space \\\hline\hline
1650 Exponential of \unit{sym} & & \unit{exp(sym)} & & \unit{exp(sym)} \\\hline\hline
1651 Natural log of \unit{sym} & & \unit{ln(sym)} & & \unit{ln(sym)} \\\hline\hline
1652 Decimal log of \unit{sym} & & \unit{log(sym)} & \unit{[sym]} & \unit{log(sym)} dimensionless argument \\\hline\hline
1653 Square root of \unit{sym} & & \unit{sqrt(sym)} & & \unit{sqrt(sym)} \\\hline\hline
1654 Other math & & {\small \unit{sin(sym), cos(sym), tan(sym), asin(sym), acos(sym), atan(sym), sinh(sym), cosh(sym), tanh(sym)} } & & not used \\\hline\hline
1655 ( ) & & allowed & allowed & optional around powers \\\hline\hline
1656 powers & superscripts & decimal and integer fractions allowed & integers only & integer (sign and () optional), OR decimal or ratio between () \\\hline
1657 Numeric factor & not used & should be avoided; only powers of 10 allowed; should precede any unit string & allowed & optional 10**k, 10\verb|^|k, or 10$\pm$k \\\hline\hline
1658 \caption{Comparison of mathematical expressions and symbol combinations.}
1659 \label{tabx:comparUnitCombine}
1660 \end{longtable}
1661 \fi
1662
1663 \clearpage
1664 \section{Formal grammars\label{appx:grammar}}
1665 % These grammars are extracted from http://bitbucket.org/nxg/unity:
1666 % % cd src/grammars
1667 % % make unity-grammars.zip
1668
1669 \emph{Subsection \ref{appx:vougrammar} is Normative, the other
1670 subsections are Informative.}
1671
1672 In this section we provide formal (yacc-style) grammars for the four
1673 ASCII-based syntaxes discussed in this document. The FITS, OGIP and
1674 CDS grammars are not normative: the corresponding specification
1675 documents do not provide grammars, and instead describe the syntaxes
1676 in text, so that the grammars here are deductions from the
1677 specification text.
1678 This unfortunately means that some of these syntaxes are ambiguous.
1679 These ambiguities are discussed in the sections below. We recommend
1680 that VO applications parse these syntaxes in a way which is consistent
1681 with the grammars here.
1682 %
1683 The grammar for the VOUnits syntax, in \prettyref{appx:vougrammar}, is normative.
1684
1685 We believe that the grammars below are such that if a string
1686 successfully parses in two distinct grammars, it means the same in
1687 both.
1688
1689 The grammars here are from the `Unity' package at
1690 \url{https://bitbucket.org/nxg/unity}, which includes machine-readable
1691 grammars, lists of recommended units, and a collection of test cases. These are also extracted in
1692 machine-readable form
1693 at \url{https://code.google.com/p/volute/source/browse/trunk/projects/std-vounits/unity-grammars.zip}.
1694
1695 In these grammars, the common terminals are as given in
1696 \prettyref{tabx:terminals}. Lexers \norm{must not} swallow whitespace
1697 in generating these terminals; whitespace is permitted in a units
1698 string only where the corresponding grammar permits
1699 the \texttt{WHITESPACE} terminal.
1700
1701 \begin{table}[ht]
1702 \begin{tabular}{rL{9cm}}
1703 \texttt{CARET}&the \texttt{\^{}} character (\hex{5e})\\
1704 \texttt{DIVISION}&the solidus, \texttt{/} (\hex{2f})\\
1705 \texttt{DOT}&the dot/period/full-stop character (\hex{2e})\\
1706 \texttt{FLOAT}&a string matching the regular expression
1707 \texttt{[-+]?[0-9]+\textbackslash.[0-9]+}\\
1708 \texttt{LIT10}&a literal string `\texttt{10}' (the sequence \hex{31} \hex{30})\\
1709 \texttt{OPEN\_P} / \texttt{CLOSE\_P}&parentheses (\hex{28} and \hex{29})\\
1710 \texttt{SIGNED\_INTEGER}&an integer with a required leading sign, so
1711 matching the regular expression \texttt{[-+][0-9]+}\\
1712 \texttt{STAR}&the asterisk (\hex{2a})\\
1713 \texttt{STARSTAR}&a pair of asterisks, \texttt{**}\\
1714 \texttt{STRING}&a non-empty sequence of letters \texttt{[a-zA-Z]+}\\
1715 \texttt{UNSIGNED\_INTEGER}&an integer with no leading sign \texttt{[0-9]+}\\
1716 \texttt{WHITESPACE}&a non-empty string of space characters (\hex{20} only)\\
1717 \end{tabular}
1718 \caption[The terminals used in the grammars]
1719 {\label{tabx:terminals}The terminals used in the grammars; the
1720 notation \hex{nn} indicates hexadecimal ASCII character numbers;
1721 the digits are \hex{30} to \hex{39}, the letters are \hex{41} to \hex{5a} and \hex{61} to
1722 \hex{7a}, and the sign characters are \hex{2b} and \hex{2d}.}
1723 \end{table}
1724
1725 \subsection{The FITS grammar (informative)}
1726 \label{appx:fitsgrammar}
1727
1728 For the FITS units syntax, see section~4.3 of~\cite{pence10}, and its
1729 associated tables. Our preferred FITS grammar is in
1730 \prettyref{tabx:fitsgrammar}.
1731
1732 As noted above in \prettyref{sec:fitsquote},
1733 the FITS specification isn't completely clear on the topic of
1734 solidi, saying ``[t]he IAU style manual forbids
1735 the use of more than one solidus (/) character in a units
1736 string. However, since normal mathematical precedence rules apply
1737 in this context, more than one solidus may be used but is
1738 discouraged''. This does not really resolve the question of whether, for
1739 example, \texttt{kg/m s} should be parsed as \units{kg~m^{-1}~s^{-1}}
1740 or as \units{kg~m^{-1}~s}, since this is a question of both operator
1741 precedence and (left-)associativity, where there might be different
1742 rules internationally, and conflicts between mathematical and
1743 programming-language rules. Most people would \emph{probably} parse
1744 it as \units{kg~m^{-1}~s^{-1}}, but we trust that most educators would
1745 oblige students to rewrite the expression on the grounds that any
1746 ambiguity is too much.
1747 Here, we resolve the ambiguity by declaring that there can
1748 be only a single expression to the right of the solidus.
1749
1750 It is a consequence of this that nothing can be
1751 successully parsed in two different grammars, with different
1752 meanings. If the right-hand-side of the division could be a
1753 \texttt{product\_of\_units}, then \texttt{kg /m s} would parse in both
1754 the FITS and OGIP syntaxes,
1755 but mean \units{kg~m^{-1}~s^{-1}} in the FITS syntax, and
1756 \units{kg~m^{-1}~s} in the OGIP one.
1757
1758 The FITS specification permits a leading numeric multiplier, but
1759 ``[c]reators of FITS files are encouraged to use the numeric
1760 multiplier only when the available standard scale-factors of [SI] will
1761 not suffice''.
1762
1763 The FITS specification permits \texttt{m(2)}, to indicate the square of
1764 unit~`m'. The grammar has to special-case this, in order to
1765 distinguish it from function application.
1766
1767 Other ambiguities:
1768 \begin{itemize}
1769 \item The FITS specification may or may not be intended to permit
1770 \texttt{10+3 /m}, but we don't.
1771 \item It is possible to read the FITS spec as permitting
1772 \texttt{m\^{}1.5}, without parentheses. We take it to be
1773 invalid here.
1774 \end{itemize}
1775
1776 \clearpage
1777 \begin{table}[t]
1778 \verbatiminput{unity-grammars/unity-fits.txt}
1779 \caption[The FITS grammar]{\label{tabx:fitsgrammar}The FITS grammar.
1780 See \prettyref{appx:fitsgrammar}.}
1781 \end{table}
1782 \clearpage
1783
1784 \subsection{The OGIP grammar (informative)}
1785 \label{appx:ogipgrammar}
1786
1787 For the OGIP units syntax, see \cite{george95}. Our preferred OGIP
1788 grammar is in \prettyref{tabx:ogipgrammar}.
1789
1790 The OGIP specification somewhat reluctantly concedes (in its section
1791 3.2) that ``occasionally it may be preferable to include [leading
1792 scale] factors on the grounds of user-friendliness'', but that ``[t]he
1793 inclusion of numerical factors should therefore be avoided wherever
1794 possible'', and it is ``suggested'' that the scale-factor should in any case
1795 be restricted to powers of~10.
1796 %On these grounds, the \texttt{FLOAT}
1797 %terminal in the grammar should be extra-syntactically restricted to
1798 %being a round power of ten.
1799
1800 Specification ambiguities:
1801 \begin{itemize}
1802 \item The OGIP specification permits a space between the leading
1803 factor and the rest of the unit (by implication from the provided
1804 examples).
1805 \item The specification does not indicate the format of the numerical
1806 factor in the case where it is not a power of ten. We have
1807 suggested \texttt{FLOAT} here (see \prettyref{tabx:terminals}).
1808 \item OGIP \emph{recommends} having no whitespace after the division
1809 solidus, but does not forbid it; therefore we permit it in this
1810 grammar.
1811 \item From its specification text, OGIP appears to permit
1812 \texttt{str1**y}, where \texttt{y} can be a float, even though none
1813 of its examples include this. The same interpretive logic would
1814 appear to permit \texttt{m**3/2}, but this seems to run too great a
1815 risk of being misparsed, and we forbid it here.
1816 \item In the same place, the text suggests that \texttt{str1**y} may
1817 omit the brackets `if~\texttt y is positive', but the context
1818 suggests that the intention is to permit this if~\texttt y is
1819 unsigned. In the grammar here, we permit the omission of the
1820 brackets only if~\texttt y is unsigned -- that is, \texttt{m**+2},
1821 like \texttt{m**-2}, is forbidden.
1822 \end{itemize}
1823
1824 %\clearpage
1825 \begin{table}[ht]
1826 \verbatiminput{unity-grammars/unity-ogip.txt}
1827 \caption[The OGIP grammar]{\label{tabx:ogipgrammar}The OGIP grammar.
1828 Note that the \texttt{FLOAT} in the \texttt{scalefactor} production
1829 must be a power of ten.
1830 See \prettyref{appx:ogipgrammar}.}
1831 \end{table}
1832 \clearpage
1833
1834 \subsection{The CDS grammar (informative)}
1835 \label{appx:cdsgrammar}
1836
1837 For the CDS units syntax, see \cite[\S3.2]{cds00}. Our preferred CDS
1838 grammar is in \prettyref{tabx:cdsgrammar}. It requires additional
1839 terminals, described in \prettyref{tabx:cdsterminals}.
1840
1841 Specification ambiguities:
1842 \begin{itemize}
1843 \item The CDS document indicates that units should be raised to powers by
1844 concatenation of the unit string with an integer, but does so rather
1845 elliptically, so that it is not clear whether \texttt{m+2} is
1846 permitted (the relevant examples show this as \texttt{m2}). We take
1847 this to be permitted in this grammar.
1848 \item The specification does not indicate the format of the numerical
1849 factor in the case where it is not a power of ten and not
1850 a \texttt{CDSFLOAT}. We have suggested \texttt{FLOAT} here
1851 (see \prettyref{tabx:terminals}).
1852 \item The document does not specify or illustrate how \texttt{kg/m/s}
1853 should be parsed. Since the document mentions the OGIP standard (even
1854 though it does not permit OGIP's syntax for powers, \texttt{m**2}), we
1855 take it that this is valid, and equivalent to \units{kg~m^{-1}~s^{-1}}.
1856 \end{itemize}
1857
1858 This specification places no restrictions on the leading scale-factor.
1859
1860 \begin{table}[ht]
1861 \verbatiminput{unity-grammars/unity-cds.txt}
1862 \caption[The CDS grammar]{\label{tabx:cdsgrammar}The CDS grammar.
1863 See \prettyref{appx:cdsgrammar} for discussion,
1864 and \prettyref{tabx:cdsterminals} for the additional terminals.}
1865 \end{table}
1866 \begin{table}[ht]
1867 \begin{tabular}{rL{10cm}}
1868 \texttt{CDSFLOAT}&a string matching the regular
1869 expression \texttt{[0-9]+\textbackslash.[0-9]+x10[-+][0-9]+}
1870 (that is, something resembling \texttt{1.5x10+11})\\
1871 \texttt{OPEN\_SQ}&the open square bracket `\texttt{[}' (indicates logs
1872 in this syntax)\\
1873 \texttt{CLOSE\_SQ}&the close square bracket `\texttt{]}'\\
1874 \texttt{PERCENT}&the percent character `\%'
1875 \end{tabular}
1876 \caption[Extra CDS terminals]{\label{tabx:cdsterminals}Extra terminals
1877 for the CDS grammar}
1878 \end{table}
1879 \clearpage
1880
1881
1882
1883 \subsection{The VOUnits grammar (normative)}
1884 \label{appx:vougrammar}
1885
1886 The VOUnits grammar is defined by this section, by the grammar in
1887 \prettyref{tabx:vougrammar}
1888 (with the terminals of \prettyref{tabx:terminals}
1889 plus the extra ones listed in \prettyref{tabx:vounitsterminals})
1890 and by the list of known units of \prettyref{tabx:knownunits}.
1891
1892 The intention of the VOUnits grammar is that if a VOUnits string
1893 does not use the scale-factor, quoted-units or binary-prefix
1894 extensions
1895 (that is, if it avoids the \texttt{VOUFLOAT}
1896 and \texttt{QUOTED\_STRING} terminals and is restricted to SI decimal prefixes),
1897 then it will be parsable, with the same semantics, by FITS
1898 and CDS parsers, and that it will be parsable by an OGIP parser if
1899 dots are replaced by stars.
1900 See \prettyref{sec:deviations} for discussion.
1901 In particular:
1902 \begin{itemize}
1903 \item The product of units is indicated only by a dot, with no
1904 whitespace: \texttt{N.m}.
1905 \item Raising a unit to a power is done only with a double-star:
1906 \texttt{kg.m**2.s**-2}.
1907 \item There may be at most one division sign at the top level of an
1908 expression.
1909 \end{itemize}
1910
1911 In \prettyref{tabx:vougrammar}, the \texttt{VOUFLOAT} terminal is a
1912 string matching either of the regular expressions
1913 \begin{itemize}
1914 \item\texttt{0\textbackslash.[0-9]+([eE][+-]?[0-9]+)?}
1915 \item\texttt{[1-9][0-9]*(\textbackslash.[0-9]+)?([eE][+-]?[0-9]+)?}
1916 \end{itemize}
1917 (that is, something resembling \texttt{0.123} or \texttt{1.5e+11}).
1918
1919
1920 \begin{table}[ht]
1921 \verbatiminput{unity-grammars/unity-vounits.txt}
1922 \caption[The VOUnits grammar]{\label{tabx:vougrammar}The VOUnits
1923 grammar. See \prettyref{appx:vougrammar} for discussion,
1924 and \prettyref{tabx:vounitsterminals} for additional terminals.}
1925 \end{table}
1926 \begin{table}[ht]
1927 \begin{tabular}{rL{10cm}}
1928 \texttt{VOUFLOAT}&see text, \prettyref{appx:vougrammar}\\
1929 \texttt{QUOTED\_STRING}&a \texttt{STRING} between single quote marks
1930 (ASCII \hex{27})
1931 \end{tabular}
1932 \caption[Extra VOUnits terminals]{\label{tabx:vounitsterminals}Extra terminals
1933 for the VOUnits grammar}
1934 \end{table}
1935 \clearpage
1936
1937 \section{Updates of this document (informative)}
1938 \begin{itemize}
1939 \item 1.0-20131224:
1940 \begin{itemize}
1941 \item Grammar changes: minor (now incorporates the grammars of Unity v0.11).
1942 \item Various clarifications to the text, following on-list discussion.
1943 \end{itemize}
1944 \item 1.0-20131025:
1945 \begin{itemize}
1946 \item Grammar changes: The `\%' character is now treated as a special
1947 case, rather than being a permitted 'STRING' character; it's only
1948 the CDS syntax that permits this character. Some readability
1949 adjustments to the grammars. Unit strings with leading slashes
1950 (eg \unit{/m3}) are no longer supported in the VOUnits syntax.
1951 The grammars now match Unity v0.10.
1952 \item Changed discussion/rationale for forbidding non-ASCII
1953 characters.
1954 \item Clarified that `?' -- which is specified as indicating an
1955 unknown unit -- is not part of the VOUnits grammar, and should be
1956 spotted by a caller before parsing begins.
1957 \item Clarified the extra terminals which some grammars use.
1958 \item Clarified that the ambiguity in \unit{dadu} should remain
1959 unresolved, and the correct behaviour unspecified (is it
1960 deci-\texttt{adu} or deka-\texttt{du}?).
1961 \end{itemize}
1962 \item 1.0-20131011: Changed gramme in gram; removed color property to distinguish arrows in fig .2;
1963 Removed astro'l unit abbreviation from known-units.tex
1964 \item 1.0-20130922: Responding to RFC and mailing list comments.
1965 Addition of quoted units and arbitrary scale-factor (so updates to
1966 grammars, which now match Unity v0.9). Some reformatting of tables.
1967 \item 1.0-20130724: Rephrasing and clarification, responding to RFC
1968 comments. Update unity grammars to current version (ie, version of 2013-07-22 18:40).
1969 \item 1.0-20130701: Simplified Architecture diagram. Added example
1970 with scientific notation. Adjusted locations of grammar tables to try
1971 to keep them closer to the associated text.
1972 \item 1.0-20130429: Some restructuring, some rephrasing, and a few layout changes.
1973 \item 1.0-20130225: Large tables from section 3 moved to Appendix A. Short summaries of symbols added
1974 to section 3. Changes to table of known units for consistency with text. Added explanations for units Sun and byte.
1975 \item 1.0-20121212:
1976 Minor typographical fixes. Added definition of OGIP. Removed last sentence from acknowledgements, which have been moved to the beginning of the document. Changed figure 1 to move Units in Semantics. Added 'discouraged' in first line of \prettyref{tab:VOUnitCombine}. Color change in figure 2 and its label.
1977 \item 1.0-20120801:
1978 Minor typographical fixes
1979 \item 1.0-20120801:
1980 \begin{itemize}
1981 \item Included yacc-style grammars in document.
1982 \end{itemize}
1983 \item 1.0-20120718:
1984 \begin{itemize}
1985 \item Removed external tables refs in tables to avoid confusion.
1986 \item Removed refs to SOFA and NOVAS.
1987 \item Precision on the "no unit" case in text.
1988 \item Added formal grammar in annex.
1989 \item Minor editing and typo fixes.
1990 \end{itemize}
1991 \item 1.0-20120521:
1992 \begin{itemize}
1993 \item Typos fixed, removed F. Bonnarel from authors.
1994 \item One sentence rephrased in section 1.2 for clarity.
1995 \item Clarification of \unit{g} and \unit{kg} issue in \prettyref{sec:baseUnits}.
1996 \item Added remark on \unit{Pa} in \prettyref{sec:scaleFactors}.
1997 \item Micro-arcsecond and century explained in \prettyref{tabx:comparUnitAstro}.
1998 \item \prettyref{tabx:comparUnitDeprecated} completed.
1999 \item Added numeric factors in \prettyref{tabx:comparUnitCombine} and discussion in text.
2000 \end{itemize}
2001 \item 1.0-20111216: Major rework of the document.
2002 \item 0.3: initial public release.
2003 %\item version 0.1 to 0.2
2004 % \begin{itemize}
2005 % \item 20090521
2006 % \begin{itemize}
2007 % \item added UCD to Quantity in point 4 of subsection ~\ref{sec:labels}
2008 % \item added `.' in the notation in unit strings in section ~\ref{sec:simpleuse}
2009 % \item added a sentence on the help of UCd in quantity in section ~\ref{sec:UML}
2010 % \end{itemize}
2011 % \item 20090522
2012 % \begin{itemize}
2013 % \item clarified the scope of the model in Section \ref{sec:purpose}
2014 % \item added references in Section \ref{sec:vocab}
2015 % \item added requirement to be consistent with Quantity DM in
2016 % Section~\ref{sec:quantities}
2017 % \item minor clarification and subediting
2018 % \end{itemize}
2019 % \end{itemize}
2020 \end{itemize}
2021
2022 \clearpage
2023 \bibliographystyle{plainnat-eprints}
2024 \bibliography{bib}
2025
2026
2027 \end{document}

Properties

Name Value
svn:keywords Date Revision HeadURL

msdemlei@ari.uni-heidelberg.de
ViewVC Help
Powered by ViewVC 1.1.26