Skip to content

Commit 3d7bb10

Browse files
committed
Drafting paper
1 parent d44fcb6 commit 3d7bb10

File tree

3 files changed

+179
-130
lines changed

3 files changed

+179
-130
lines changed

paper/benchmark.pdf

11.3 KB
Binary file not shown.

paper/paper.bib

Lines changed: 96 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,100 @@
1-
@article{Pearson:2017,
2-
url = {http://adsabs.harvard.edu/abs/2017arXiv170304627P},
3-
Archiveprefix = {arXiv},
4-
Author = {{Pearson}, S. and {Price-Whelan}, A.~M. and {Johnston}, K.~V.},
5-
Eprint = {1703.04627},
6-
Journal = {ArXiv e-prints},
7-
Keywords = {Astrophysics - Astrophysics of Galaxies},
8-
Month = mar,
9-
Title = {{Gaps in Globular Cluster Streams: Pal 5 and the Galactic Bar}},
10-
Year = 2017
11-
}
12-
13-
@book{Binney:2008,
14-
url = {http://adsabs.harvard.edu/abs/2008gady.book.....B},
15-
Author = {{Binney}, J. and {Tremaine}, S.},
16-
Booktitle = {Galactic Dynamics: Second Edition, by James Binney and Scott Tremaine.~ISBN 978-0-691-13026-2 (HB).~Published by Princeton University Press, Princeton, NJ USA, 2008.},
17-
Publisher = {Princeton University Press},
18-
Title = {{Galactic Dynamics: Second Edition}},
19-
Year = 2008
20-
}
21-
22-
@article{gaia,
23-
author = {{Gaia Collaboration}},
24-
title = "{The Gaia mission}",
25-
journal = {Astronomy and Astrophysics},
26-
archivePrefix = "arXiv",
27-
eprint = {1609.04153},
28-
primaryClass = "astro-ph.IM",
29-
keywords = {space vehicles: instruments, Galaxy: structure, astrometry, parallaxes, proper motions, telescopes},
30-
year = 2016,
31-
month = nov,
32-
volume = 595,
33-
doi = {10.1051/0004-6361/201629272},
34-
url = {http://adsabs.harvard.edu/abs/2016A%26A...595A...1G},
35-
}
36-
37-
@article{astropy,
38-
author = {{Astropy Collaboration}},
39-
title = "{Astropy: A community Python package for astronomy}",
40-
journal = {Astronomy and Astrophysics},
41-
archivePrefix = "arXiv",
42-
eprint = {1307.6212},
43-
primaryClass = "astro-ph.IM",
44-
keywords = {methods: data analysis, methods: miscellaneous, virtual observatory tools},
45-
year = 2013,
46-
month = oct,
47-
volume = 558,
48-
doi = {10.1051/0004-6361/201322068},
49-
url = {http://adsabs.harvard.edu/abs/2013A%26A...558A..33A}
50-
}
51-
52-
@misc{fidgit,
53-
author = {A. M. Smith and K. Thaney and M. Hahnel},
54-
title = {Fidgit: An ungodly union of GitHub and Figshare},
1+
@misc{djiparsetxt,
2+
author = {Christian Velez},
3+
title = {Decrypts and parse DJI logs in node},
4+
publisher = {GitHub},
5+
journal = {GitHub repository},
556
year = {2020},
7+
url = {https://github.com/chrisvm/node-djiparsetxt}
8+
}
9+
10+
@misc{libsbp,
11+
author = {{Swift Navigation}},
12+
title = {Swift Binary Protocol client libraries},
5613
publisher = {GitHub},
5714
journal = {GitHub repository},
58-
url = {https://github.com/arfon/fidgit}
15+
year = {2021},
16+
url = {https://github.com/swift-nav/libsbp}
17+
}
18+
19+
@misc{nimrod,
20+
author = {Starbeamrainbowlabs},
21+
title = {Data downloader for the 1km NIMROD rainfall radar data},
22+
publisher = {GitHub},
23+
journal = {GitHub repository},
24+
year = {2021},
25+
url = {https://github.com/sbrl/nimrod-data-downloader}
26+
}
27+
28+
@misc{flexradio,
29+
author = {Stephen Houser},
30+
title = {NodeRed nodes for working with FlexRadio 6xxx series software defined radios},
31+
publisher = {GitHub},
32+
journal = {GitHub repository},
33+
year = {2021},
34+
url = {https://github.com/stephenhouser/node-red-contrib-flexradio}
35+
}
36+
37+
@misc{linky,
38+
author = {Zehir},
39+
publisher = {GitHub},
40+
journal = {GitHub repository},
41+
year = {2021},
42+
url = {https://github.com/Zehir/eesmart-d2l}
43+
}
44+
45+
@misc{maxcul,
46+
author = {Florian Beek},
47+
title = {A pimatic Plugin to control MAX! Heating devices over a Busware CUL stick},
48+
publisher = {GitHub},
49+
journal = {GitHub repository},
50+
year = {2020},
51+
url = {https://github.com/fbeek/pimatic-maxcul}
52+
}
53+
54+
@misc{kaitai,
55+
author = {{Kaitai team}},
56+
title = {Kaitai Struct: declarative language to generate binary data parsers},
57+
publisher = {GitHub},
58+
journal = {GitHub repository},
59+
year = {2021},
60+
url = {https://github.com/kaitai-io/kaitai_struct}
61+
}
62+
63+
@inproceedings{nail,
64+
author={Bangert, Julian and Zeldovich, Nickolai},
65+
booktitle={2014 IEEE Security and Privacy Workshops},
66+
title={Nail: A Practical Interface Generator for Data Formats},
67+
year={2014},
68+
pages={158-166},
69+
doi={10.1109/SPW.2014.31}
70+
}
71+
72+
@inproceedings{nom,
73+
author={Couprie, Geoffroy},
74+
booktitle={2015 IEEE Security and Privacy Workshops},
75+
title={Nom, A Byte oriented, streaming, Zero copy, Parser Combinators Library in Rust},
76+
year={2015},
77+
pages={142-148},
78+
doi={10.1109/SPW.2015.31}
79+
}
80+
81+
@inproceedings{parsifal,
82+
author={Levillain, Olivier},
83+
booktitle={2014 IEEE Security and Privacy Workshops},
84+
title={Parsifal: A Pragmatic Solution to the Binary Parsing Problems},
85+
year={2014},
86+
pages={191-197},
87+
doi={10.1109/SPW.2014.35}
88+
}
89+
90+
@article{monadic,
91+
title={Monadic parsing in Haskell},
92+
volume={8},
93+
doi={10.1017/S0956796898003050},
94+
number={4},
95+
journal={Journal of Functional Programming},
96+
publisher={Cambridge University Press},
97+
author={Hutton, Graham and Meijer, Erik},
98+
year={1998},
99+
pages={437–444}
59100
}

paper/paper.md

Lines changed: 83 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1,94 +1,102 @@
11
---
2-
title: 'Binary-parser: A blazing-fast declarative parser builder for binary data'
2+
title: 'Binary-parser: A declarative and efficient parser generator for binary data'
33
tags:
44
- JavaScript
5+
- TypeScript
6+
- binary
7+
- parser
58
authors:
69
- name: Keichi Takahashi
710
orcid: 0000-0002-1607-5694
811
affiliation: 1
912
affiliations:
1013
- name: Nara Institute of Science and Technology
11-
date: 21 September 2021
14+
index: 1
15+
date: 27 September 2021
1216
bibliography: paper.bib
1317
---
1418

1519
# Summary
1620

17-
The forces on stars, galaxies, and dark matter under external gravitational
18-
fields lead to the dynamical evolution of structures in the universe. The orbits
19-
of these bodies are therefore key to understanding the formation, history, and
20-
future state of galaxies. The field of "galactic dynamics," which aims to model
21-
the gravitating components of galaxies to study their structure and evolution,
22-
is now well-established, commonly taught, and frequently used in astronomy.
23-
Aside from toy problems and demonstrations, the majority of problems require
24-
efficient numerical tools, many of which require the same base code (e.g., for
25-
performing numerical orbit integration).
21+
This paper presents `binary-parser`, a JavaScript/TypeScript library that
22+
allows users to write high-performance binary parsers, and facilitates the
23+
rapid prototyping of research software that works with binary files and
24+
network protocols. `Binary-parser`'s declarative API is designed such that
25+
expressing complex binary structures is straightforward and easy. In addition
26+
to the high productivity, `binary-parser` utilizes meta-programming to
27+
dynamically generate parser codes to achieve parsing performance equivalent
28+
to a hand-written parser. `Binary-parser` is being used by over 700 GitHub
29+
repositories and 120 npm packages as of September 2021.
2630

2731
# Statement of need
2832

29-
`Gala` is an Astropy-affiliated Python package for galactic dynamics. Python
30-
enables wrapping low-level languages (e.g., C) for speed without losing
31-
flexibility or ease-of-use in the user-interface. The API for `Gala` was
32-
designed to provide a class-based and user-friendly interface to fast (C or
33-
Cython-optimized) implementations of common operations such as gravitational
34-
potential and force evaluation, orbit integration, dynamical transformations,
35-
and chaos indicators for nonlinear dynamics. `Gala` also relies heavily on and
36-
interfaces well with the implementations of physical units and astronomical
37-
coordinate systems in the `Astropy` package [@astropy] (`astropy.units` and
38-
`astropy.coordinates`).
39-
40-
`Gala` was designed to be used by both astronomical researchers and by
41-
students in courses on gravitational dynamics or astronomy. It has already been
42-
used in a number of scientific publications [@Pearson:2017] and has also been
43-
used in graduate courses on Galactic dynamics to, e.g., provide interactive
44-
visualizations of textbook material [@Binney:2008]. The combination of speed,
45-
design, and support for Astropy functionality in `Gala` will enable exciting
46-
scientific explorations of forthcoming data releases from the *Gaia* mission
47-
[@gaia] by students and experts alike.
48-
49-
# Mathematics
50-
51-
Single dollars ($) are required for inline mathematics e.g. $f(x) = e^{\pi/x}$
52-
53-
Double dollars make self-standing equations:
54-
55-
$$\Theta(x) = \left\{\begin{array}{l}
56-
0\textrm{ if } x < 0\cr
57-
1\textrm{ else}
58-
\end{array}\right.$$
59-
60-
You can also use plain \LaTeX for equations
61-
\begin{equation}\label{eq:fourier}
62-
\hat f(\omega) = \int_{-\infty}^{\infty} f(x) e^{i\omega x} dx
63-
\end{equation}
64-
and refer to \autoref{eq:fourier} from text.
65-
66-
# Citations
67-
68-
Citations to entries in paper.bib should be in
69-
[rMarkdown](http://rmarkdown.rstudio.com/authoring_bibliographies_and_citations.html)
70-
format.
71-
72-
If you want to cite a software repository URL (e.g. something on GitHub without a preferred
73-
citation) then you can do it with the example BibTeX entry below for @fidgit.
74-
75-
For a quick reference, the following citation commands can be used:
76-
- `@author:2001` -> "Author et al. (2001)"
77-
- `[@author:2001]` -> "(Author et al., 2001)"
78-
- `[@author1:2001; @author2:2001]` -> "(Author1 et al., 2001; Author2 et al., 2002)"
79-
80-
# Figures
81-
82-
Figures can be included like this:
83-
![Caption for example figure.\label{fig:example}](figure.png)
84-
and referenced from text using \autoref{fig:example}.
85-
86-
Figure sizes can be customized by adding an optional second parameter:
87-
![Caption for example figure.](figure.png){ width=20% }
88-
89-
# Acknowledgements
90-
91-
We acknowledge contributions from Brigitta Sipocz, Syrtis Major, and Semyeong
92-
Oh, and support from Kathryn Johnston during the genesis of this project.
33+
Parsing binary data is a ubiquitous task in developing research software. Many
34+
scientific instruments and software tools use proprietary file formats and
35+
network protocols, while open-source libraries to work with them are often
36+
unavailable or limited. In such situations, the programmer has no choice but
37+
to write a binary parser. However, writing a binary parser by hand is
38+
error-prone and tedious because the programmer faces challenges such as
39+
understanding the specification of the binary format, correctly managing the
40+
byte/bit offsets during parsing, and constructing complex data structures as
41+
outputs.
42+
43+
`Binary-parser` significantly reduces the programmer's effort by automatically
44+
generating efficient parser code from a declarative description of the binary
45+
format supplied by the user. The generated parser code is converted to a
46+
JavaScript function and executed for efficient parsing. To accommodate diverse
47+
needs by different users, `binary-parser` exposes various options to ensure
48+
flexibility and provide opportunities for customization.
49+
50+
A large number of software packages have been developed using `binary-parser`
51+
that demonstrates its usefulness and practicality. Some examples include
52+
libraries and applications to work with rainfall radars [@nimrod],
53+
software-defined radio [@flexradio], GNSS receivers [@libsbp], smart meters
54+
[@linky], drones [@djiparsetxt], and thermostats [@maxcul].
55+
56+
# Design
57+
58+
`Binary-parser`'s design is characterized by the following three key features:
59+
60+
1. **Fast**: `Binary-parser` takes advantage of meta-programming to generate
61+
a JavaScript source code during runtime from the user's description of the
62+
target binary format. The generated source code is then passed to the
63+
`Function` constructor to dynamically create a function that performs
64+
parsing. This design enables `binary-parser` to achieve parsing
65+
performance comparable to a hand-written parser.
66+
2. **Declarative**: As opposed to parser combinator libraries [@monadic; @nom],
67+
`binary-parser` allows the user to express the target binary format in a
68+
declarative manner, similar to a human-readable network protocol or file
69+
format specification. The user can combine _primitive_ parsers (integers,
70+
floating point numbers, bit fields, strings and bytes) using _composite_
71+
parsers (arrays, choices, nests and pointers) to express a wide variety of
72+
binary formats.
73+
3. **Flexible**: Unlike binary parser generators that use an external Domain
74+
Specific Language (DSL) [@kaitai; @nail], `binary-parser` uses an internal
75+
DSL implemented on top of JavaScript. This design allows the user to
76+
specify most parsing options as return values of user-defined JavaScript
77+
functions that are invoked at runtime. For example, the offset and length
78+
of a field can be computed from another field that has been parsed already.
79+
80+
# Performance evaluation
81+
82+
To evaluate the parsing performance of `binary-parser`, we implemented a small
83+
parser using `binary-parser` (v2.0.1) and three major JavaScript binary parser
84+
libraries: `binparse` (v1.2.1), `structron` (v0.4.3) and `destruct.js` (v0.2.9).
85+
We also implemented the same parser using Node.js's Buffer API as a baseline.
86+
The binary data to be parsed was an array of 1,000 coordinates (each expressed
87+
as three 16-bit integers) preceded by the number of coordinates (a 32-bit
88+
integer). The benchmarks were executed on a MacBook Air (Apple M1 CPU, 2020).
89+
The JavaScript runtime was Node.js (v16.9.1).
90+
91+
![Performance comparison of binary-parser, binparse, structron, destruct.js and a hand-written parser.\label{fig:benchmark}](benchmark.pdf){ width=80% }
92+
93+
\autoref{fig:benchmark} shows the measurement results. Evidently,
94+
`binary-parser` significantly outperforms its alternatives by a factor of
95+
7.5$\times$ to 180$\times$. The plot also reveals that `binary-parser`
96+
achieves performance equal to a hand-written parser.
97+
98+
# Acknowledgments
99+
100+
This work was partly supported by JSPS KAKENHI Grant Number JP20K19808.
93101

94102
# References

0 commit comments

Comments
 (0)