dmx.bib

@inproceedings{dmx:hpca:2024,
  author={Wang, Shu-Ting and Xu, Hanyang and Mamandipoor, Amin and Mahapatra, Rohan and Ahn, Byung Hoon and Ghodrati, Soroush and Kailas, Krishnan and Alian, Mohammad and Esmaeilzadeh, Hadi},
  booktitle={2024 IEEE International Symposium on High-Performance Computer Architecture (HPCA)}, 
  title={Data Motion Acceleration: Chaining Cross-Domain Multi Accelerators}, 
  year={2024},
  volume={},
  number={},
  pages={1043-1062},
}

@inproceedings{accelerator-cluster:hoti:2023,
author = {Bill Dally},
title = {Accelerator Clusters: the New Supercomputer},
year = {2023},
booktitle = {HOTI}
}

@inproceedings{q100:asplos:2014,
  author    = {Wu, Lisa and Lottarini, Andrea and Paine, Timothy K. and Kim, Martha A. and Ross, Kenneth A.},
  title     = {Q100: The Architecture and Design of a Database Processing Unit},
  year      = {2014},
  isbn      = {9781450323055},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  url       = {https://doi.org/10.1145/2541940.2541961},
  doi       = {10.1145/2541940.2541961},
  abstract  = {In this paper, we propose Database Processing Units, or DPUs, a class of domain-specific database processors that can efficiently handle database applications. As a proof of concept, we present the instruction set architecture, microarchitecture, and hardware implementation of one DPU, called Q100. The Q100 has a collection of heterogeneous ASIC tiles that process relational tables and columns quickly and energy-efficiently. The architecture uses coarse grained in- structions that manipulate streams of data, thereby maximizing pipeline and data parallelism, and minimizing the need to time multiplex the accelerator tiles and spill inter- mediate results to memory. This work explores a Q100 de- sign space of 150 configurations, selecting three for further analysis: a small, power-conscious implementation, a high- performance implementation, and a balanced design that maximizes performance per Watt. We then demonstrate that the power-conscious Q100 handles the TPC-H queries with three orders of magnitude less energy than a state of the art software DBMS, while the performance-oriented design out- performs the same DBMS by 70X.},
  booktitle = {Proceedings of the 19th International Conference on Architectural Support for Programming Languages and Operating Systems},
  pages     = {255–268},
  numpages  = {14},
  keywords  = {dpu, accelerator, microarchitecture, streaming data, database, specialized functional unit},
  location  = {Salt Lake City, Utah, USA},
  series    = {ASPLOS '14}
}

@inproceedings{meet-the-walkers:isca:2013,
  author    = {Kocberber, Onur and Grot, Boris and Picorel, Javier and Falsafi, Babak and Lim, Kevin and Ranganathan, Parthasarathy},
  title     = {Meet the Walkers: Accelerating Index Traversals for in-Memory Databases},
  year      = {2013},
  isbn      = {9781450326384},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  url       = {https://doi.org/10.1145/2540708.2540748},
  doi       = {10.1145/2540708.2540748},
  abstract  = {The explosive growth in digital data and its growing role in real-time decision support motivate the design of high-performance database management systems (DBMSs). Meanwhile, slowdown in supply voltage scaling has stymied improvements in core performance and ushered an era of power-limited chips. These developments motivate the design of DBMS accelerators that (a) maximize utility by accelerating the dominant operations, and (b) provide flexibility in the choice of DBMS, data layout, and data types.We study data analytics workloads on contemporary in-memory databases and find hash index lookups to be the largest single contributor to the overall execution time. The critical path in hash index lookups consists of ALU-intensive key hashing followed by pointer chasing through a node list. Based on these observations, we introduce Widx, an on-chip accelerator for database hash index lookups, which achieves both high performance and flexibility by (1) decoupling key hashing from the list traversal, and (2) processing multiple keys in parallel on a set of programmable walker units. Widx reduces design cost and complexity through its tight integration with a conventional core, thus eliminating the need for a dedicated TLB and cache. An evaluation of Widx on a set of modern data analytics workloads (TPC-H, TPC-DS) using full-system simulation shows an average speedup of 3.1x over an aggressive OoO core on bulk hash table operations, while reducing the OoO core energy by 83\%.},
  booktitle = {Proceedings of the 46th Annual IEEE/ACM International Symposium on Microarchitecture},
  pages     = {468–479},
  numpages  = {12},
  keywords  = {energy efficiency, database indexing, hardware accelerators},
  location  = {Davis, California},
  series    = {MICRO-46}
}

@inproceedings{mahapatra:mlarchsys:2022,
  title     = {Exploring Efficient ML-based Scheduler for Microservices in Heterogenous Clusters},
  author    = {Mahapatra, Rohan and Ahn, Byung Hoon and Wang, Shu-Ting and Xu, Hanyang and Esmaeilzadeh, Hadi},
  booktitle = {Machine Learning for Computer Architecture and Systems 2022},
  year      = {2022}
}

@misc{intel-cascade-lake,
  title = {Intel Cascade Lake},
  url   = {https://ark.intel.com/content/www/us/en/ark/products/192447/intel-xeon-gold-6252-processor-35-75m-cache-2-10-ghz.html}
}

@misc{intel-ice-lake,
  title = {Intel Ice Lake},
  url   = {https://ark.intel.com/content/www/us/en/ark/products/212456/intel-xeon-gold-6348-processor-42m-cache-2-60-ghz.html}
}

@misc{intel-sapphire-rapids,
  title = {Intel Sapphire Rapids},
  url   = {https://ark.intel.com/content/www/us/en/ark/products/231750/intel-xeon-platinum-8468h-processor-105m-cache-2-10-ghz.html}
}

@inproceedings{top-down:ispass:2014,
  author    = {Yasin, Ahmad},
  booktitle = {2014 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)},
  title     = {A Top-Down method for performance analysis and counters architecture},
  year      = {2014},
  volume    = {},
  number    = {},
  pages     = {35-44}
}

@inproceedings{in-network-compute:eurosys:2019,
  author      = {Tokusashi, Yuta and Dang, Huynh Tu and Pedone, Fernando and Soul\'{e}, Robert and Zilberman, Noa},
  title       = {The Case For In-Network Computing On Demand},
  year        = {2019},
  optabstract = {Programmable network hardware can run services traditionally deployed on servers, resulting in orders-of-magnitude improvements in performance. Yet, despite these performance improvements, network operators remain skeptical of in-network computing. The conventional wisdom is that the operational costs from increased power consumption outweigh any performance benefits. Unless in-network computing can justify its costs, it will be disregarded as yet another academic exercise.In this paper, we challenge that assumption, by providing a detailed power analysis of several in-network computing use cases. Our experiments show that in-network computing can be extremely power-efficient. In fact, for a single watt, a software system on commodity CPU can be improved by a factor of x100 using an FPGA, and a factor of x1000 utilizing ASIC implementations. However, this efficiency depends on the system load. To address changing workloads, we propose in-network computing on demand, where services can be dynamically moved between servers and the network. By shifting the placement of services on-demand, data centers can optimize for both performance and power efficiency.},
  booktitle   = {Proceedings of the Fourteenth EuroSys Conference 2019},
  optseries   = {EuroSys '19}
}

@inproceedings{in-network-compute:hotnets:2017,
  author    = {Sapio, Amedeo and Abdelaziz, Ibrahim and Aldilaijan, Abdulla and Canini, Marco and Kalnis, Panos},
  title     = {In-Network Computation is a Dumb Idea Whose Time Has Come},
  year      = {2017},
  abstract  = {Programmable data plane hardware creates new opportunities for infusing intelligence into the network. This raises a fundamental question: what kinds of computation should be delegated to the network?In this paper, we discuss the opportunities and challenges for co-designing data center distributed systems with their network layer. We believe that the time has finally come for offloading part of their computation to execute in-network. However, in-network computation tasks must be judiciously crafted to match the limitations of the network machine architecture of programmable devices. With the help of our experiments on machine learning and graph analytics workloads, we identify that aggregation functions raise opportunities to exploit the limited computation power of networking hardware to lessen network congestion and improve the overall application performance. Moreover, as a proof-of-concept, we propose Daiet, a system that performs in-network data aggregation. Experimental results with an initial prototype show a large data reduction ratio (86.9\%-89.3\%) and a similar decrease in the workers' computation time.},
  booktitle = {Proceedings of the 16th ACM Workshop on Hot Topics in Networks}
}

@inproceedings{drmt:sigcomm:2017,
  author      = {Chole, Sharad and Fingerhut, Andy and Ma, Sha and Sivaraman, Anirudh and Vargaftik, Shay and Berger, Alon and Mendelson, Gal and Alizadeh, Mohammad and Chuang, Shang-Tse and Keslassy, Isaac and Orda, Ariel and Edsall, Tom},
  title       = {DRMT: Disaggregated Programmable Switching},
  year        = {2017},
  optabstract = {We present dRMT (disaggregated Reconfigurable Match-Action Table), a new architecture for programmable switches. dRMT overcomes two important restrictions of RMT, the predominant pipeline-based architecture for programmable switches: (1) table memory is local to an RMT pipeline stage, implying that memory not used by one stage cannot be reclaimed by another, and (2) RMT is hardwired to always sequentially execute matches followed by actions as packets traverse pipeline stages. We show that these restrictions make it difficult to execute programs efficiently on RMT.dRMT resolves both issues by disaggregating the memory and compute resources of a programmable switch. Specifically, dRMT moves table memories out of pipeline stages and into a centralized pool that is accessible through a crossbar. In addition, dRMT replaces RMT's pipeline stages with a cluster of processors that can execute match and action operations in any order.We show how to schedule a P4 program on dRMT at compile time to guarantee deterministic throughput and latency. We also present a hardware design for dRMT and analyze its feasibility and chip area. Our results show that dRMT can run programs at line rate with fewer processors compared to RMT, and avoids performance cliffs when there are not enough processors to run a program at line rate. dRMT's hardware design incurs a modest increase in chip area relative to RMT, mainly due to the crossbar.},
  booktitle   = {Proceedings of the Conference of the ACM Special Interest Group on Data Communication},
  optkeywords = {disagreggation, RMT, Programmable switching, packet processing},
  optseries   = {SIGCOMM '17}
}

@inproceedings{rmt:sigcomm:2013,
  author      = {Bosshart, Pat and Gibb, Glen and Kim, Hun-Seok and Varghese, George and McKeown, Nick and Izzard, Martin and Mujica, Fernando and Horowitz, Mark},
  title       = {Forwarding Metamorphosis: Fast Programmable Match-Action Processing in Hardware for SDN},
  year        = {2013},
  optabstract = {In Software Defined Networking (SDN) the control plane is physically separate from the forwarding plane. Control software programs the forwarding plane (e.g., switches and routers) using an open interface, such as OpenFlow. This paper aims to overcomes two limitations in current switching chips and the OpenFlow protocol: i) current hardware switches are quite rigid, allowing ``Match-Action'' processing on only a fixed set of fields, and ii) the OpenFlow specification only defines a limited repertoire of packet processing actions. We propose the RMT (reconfigurable match tables) model, a new RISC-inspired pipelined architecture for switching chips, and we identify the essential minimal set of action primitives to specify how headers are processed in hardware. RMT allows the forwarding plane to be changed in the field without modifying hardware. As in OpenFlow, the programmer can specify multiple match tables of arbitrary width and depth, subject only to an overall resource limit, with each table configurable for matching on arbitrary fields. However, RMT allows the programmer to modify all header fields much more comprehensively than in OpenFlow. Our paper describes the design of a 64 port by 10 Gb/s switch chip implementing the RMT model. Our concrete design demonstrates, contrary to concerns within the community, that flexible OpenFlow hardware switch implementations are feasible at almost no additional cost or power.},
  booktitle   = {Proceedings of the Conference of the ACM Special Interest Group on Data Communication},
  optkeywords = {sdn, reconfigurable match tables, rmt model},
  optseries   = {SIGCOMM '13}
}

@misc{odsa-bow-spec,
  title = {{ODSA}-{BoW} Specifications},
  url   = {https://opencomputeproject.github.io/ODSA-BoW/bow_specification.html}
}

@misc{ucie-spec,
  title = {{UCIe} 1.1 Specifications},
  url   = {https://www.uciexpress.org/specifications}
}

@inproceedings{amd-chiplet:isca:2021,
  author      = {Naffziger, Samuel and Beck, Noah and Burd, Thomas and Lepak, Kevin and Loh, Gabriel H. and Subramony, Mahesh and White, Sean},
  title       = {Pioneering Chiplet Technology and Design for the AMD EPYC™ and Ryzen™ Processor Families},
  year        = {2021},
  optabstract = {For decades, Moore's Law has delivered the ability to integrate an exponentially increasing number of devices in the same silicon area at a roughly constant cost. This has enabled tremendous levels of integration, where the capabilities of computer systems that previously occupied entire rooms can now fit on a single integrated circuit.In recent times, the steady drum beat of Moore's Law has started to slow down. Whereas device density historically doubled every 18--24 months, the rate of recent silicon process advancements has declined. While improvements in device scaling continue, albeit at a reduced pace, the industry is simultaneously observing increases in manufacturing costs.In response, the industry is now seeing a trend toward reversing direction on the traditional march toward more integration. Instead, multiple industry and academic groups are advocating that systems on chips (SoCs) be "disintegrated" into multiple smaller "chiplets." This paper details the technology challenges that motivated AMD to use chiplets, the technical solutions we developed for our products, and how we expanded the use of chiplets from individual processors to multiple product families.},
  booktitle   = {Proceedings of the 48th Annual International Symposium on Computer Architecture},
  optkeywords = {chiplets, moore's law, processors, industry, modular},
  optseries   = {ISCA '21}
}

@inproceedings{ibm-telum-processor:isca:2022,
  author      = {Lichtenau, Cedric and Buyuktosunoglu, Alper and Bertran, Ramon and Figuli, Peter and Jacobi, Christian and Papandreou, Nikolaos and Pozidis, Haris and Saporito, Anthony and Sica, Andrew and Tzortzatos, Elpida},
  title       = {{AI} Accelerator on {IBM Telum} Processor: Industrial Product},
  year        = {2022},
  optabstract = {IBM Telum is the next generation processor chip for IBM Z and LinuxONE systems. The Telum design is focused on enterprise class workloads and it achieves over 40\% per socket performance growth compared to IBM z15. The IBM Telum is the first server-class chip with a dedicated on-chip AI accelerator that enables clients to gain real time insights from their data as it is getting processed.Seamlessly infusing AI in all enterprise workloads is highly desirable to get real business insight on every transaction as well as to improve IT operation, security, and data privacy. While it would undeniably provide significant additional value, its application in practice is often accompanied by hurdles from low throughput if run on-platform to security concerns and inconsistent latency if run off-platform. The IBM Telum chip introduces an on-chip AI accelerator that provides consistent low latency and high throughput (over 200 TFLOPS in 32 chip system) inference capacity usable by all threads. The accelerator is memory coherent and directly connected to the fabric like any other general-purpose core to support low latency inference while meeting the system's transaction rate. A scalable architecture providing transparent access to AI accelerator functions via a non-privileged general-purpose core instruction further reduces software orchestration and library complexity as well as provides extensibility to the AI functions. On a global bank customer credit card fraud detection model, the AI accelerator achieves 22\texttimes{} speed up in latency compared to a general purpose core utilizing vector execution units. For the same model, the AI accelerator achieves 116k inferences every second with a latency of only 1.1 msec. As the system is scaled up from one chip to 32 chips, it performs more than 3.5 Million inferences/sec and the latency still stays very low at only 1.2 msec.This paper briefly introduces the IBM Telum chip and later describes the integrated AI accelerator. IBM Telum's AI accelerator architecture, microarchitecture, integration into the system stack, performance, and power are covered in detail.},
  booktitle   = {Proceedings of the 49th Annual International Symposium on Computer Architecture},
  pages       = {1012–1028},
  optkeywords = {z16, AI on server-class processor, low-latency in-transaction inference, Telum, enterprise workload AI, on-chip AI accelerator},
  optlocation = {New York, New York},
  optseries   = {ISCA '22}
}

@inproceedings{horowitz:isscc:2014,
  author    = {Horowitz, Mark},
  booktitle = {2014 IEEE International Solid-State Circuits Conference Digest of Technical Papers (ISSCC)},
  title     = {1.1 Computing's energy problem (and what we can do about it)},
  year      = {2014},
  volume    = {},
  number    = {},
  pages     = {10-14}
}

@inproceedings{basejump:dac:2018,
  author    = {Taylor, Michael Bedford},
  booktitle = {2018 55th ACM/ESDA/IEEE Design Automation Conference (DAC)},
  title     = {INVITED: BaseJump STL: SystemVerilog Needs a Standard Template Library for Hardware Design},
  year      = {2018}
}

@article{blackparrot:ieee-micro:2020,
  author  = {Petrisko, Daniel and Gilani, Farzam and Wyse, Mark and Jung, Dai Cheol and Davidson, Scott and Gao, Paul and Zhao, Chun and Azad, Zahra and Canakci, Sadullah and Veluri, Bandhav and Guarino, Tavio and Joshi, Ajay and Oskin, Mark and Taylor, Michael Bedford},
  journal = {IEEE Micro},
  title   = {BlackParrot: An Agile Open-Source RISC-V Multicore for Accelerator SoCs},
  year    = {2020},
  volume  = {40},
  number  = {4},
  pages   = {93-102}
}

@article{democratizing:cacm:2022,
  author   = {Chi, Yuze and Qiao, Weikang and Sohrabizadeh, Atefeh and Wang, Jie and Cong, Jason},
  title    = {Democratizing Domain-Specific Computing},
  year     = {2022},
  volume   = {66},
  number   = {1},
  abstract = {Creating a programming environment and compilation flow that empowers programmers to create their own DSAs efficiently and affordably on FPGAs.},
  journal  = {Commun. ACM},
  month    = {dec},
  pages    = {74–85}
}

@inproceedings{profiling:isca:2023,
  author      = {Gonzalez, Abraham and Kolli, Aasheesh and Khan, Samira and Liu, Sihang and Dadu, Vidushi and Karandikar, Sagar and Chang, Jichuan and Asanovic, Krste and Ranganathan, Parthasarathy},
  title       = {Profiling Hyperscale Big Data Processing},
  year        = {2023},
  optabstract = {Computing demand continues to grow exponentially, largely driven by "big data" processing on hyperscale data stores. At the same time, the slowdown in Moore's law is leading the industry to embrace custom computing in large-scale systems. Taken together, these trends motivate the need to characterize live production traffic on these large data processing platforms and understand the opportunity of acceleration at scale.This paper addresses this key need. We characterize three important production distributed database and data analytics platforms at Google to identify key hardware acceleration opportunities and perform a comprehensive limits study to understand the trade-offs among various hardware acceleration strategies.We observe that hyperscale data processing platforms spend significant time on distributed storage and other remote work across distributed workers. Therefore, optimizing storage and remote work in addition to compute acceleration is critical for these platforms. We present a detailed breakdown of the compute-intensive functions in these platforms and identify dominant key data operations related to datacenter and systems taxes. We observe that no single accelerator can provide a significant benefit but collectively, a sea of accelerators, can accelerate many of these smaller platform-specific functions. We demonstrate the potential gains of the sea of accelerators proposal in a limits study and analytical model. We perform a comprehensive study to understand the trade-offs between accelerator location (on-chip/off-chip) and invocation model (synchronous/asynchronous). We propose and evaluate a chained accelerator execution model where identified compute-intensive functions are accelerated and pipelined to avoid invocation from the core, achieving a 3x improvement over the baseline system while nearly matching identical performance to an ideal fully asynchronous execution model.},
  booktitle   = {Proceedings of the 50th Annual International Symposium on Computer Architecture},
  optseries   = {ISCA '23}
}

@article{pymtl3:ieee-micro:2020,
  author  = {Jiang, Shunning and Pan, Peitian and Ou, Yanghui and Batten, Christopher},
  journal = {IEEE Micro},
  title   = {PyMTL3: A Python Framework for Open-Source Hardware Modeling, Generation, Simulation, and Verification},
  year    = {2020},
  volume  = {40},
  number  = {4},
  pages   = {58-66}
}

@inproceedings{arc:dac:2012,
  title       = {Architecture Support for Accelerator-Rich CMPs},
  author      = {Cong, Jason and Ghodrat, Mohammad Ali and Gill, Michael and Grigorian, Beayna and Reinman, Glenn},
  year        = 2012,
  booktitle   = {Proceedings of the 49th Annual Design Automation Conference},
  optabstract = {This work discusses a hardware architectural support for accelerator-rich CMPs (ARC). First, we present a hardware resource management scheme for accelerator sharing. This scheme supports sharing and arbitration of multiple cores for a common set of accelerators, and it uses a hardware-based arbitration mechanism to provide feedback to cores to indicate the wait time before a particular resource becomes available. Second, we propose a light-weight interrupt system to reduce the OS overhead of handling interrupts which occur frequently in an accelerator-rich platform. Third, we propose architectural support that allows us to compose a larger virtual accelerator out of multiple smaller accelerators. We have also implemented a complete simulation tool-chain to verify our ARC architecture. Experimental results show significant performance (on average 51X) and energy improvement (on average 17X) compared to approaches using OS-based accelerator management.},
  optseries   = {DAC '12}
}
@inproceedings{firesim:isca:2018,
  title       = {{FireSim}: {FPGA}-accelerated Cycle-exact Scale-out System Simulation in the Public Cloud},
  author      = {Karandikar, Sagar and Mao, Howard and Kim, Donggyu and Biancolin, David and Amid, Alon and Lee, Dayeol and Pemberton, Nathan and Amaro, Emmanuel and Schmidt, Colin and Chopra, Aditya and Huang, Qijing and Kovacs, Kyle and Nikolic, Borivoje and Katz, Randy and Bachrach, Jonathan and Asanovi\'{c}, Krste},
  year        = 2018,
  booktitle   = {Proceedings of the 45th Annual International Symposium on Computer Architecture},
  optkeywords = {computer architecture, computer networks, computer simulation, data centers, distributed computing, field programmable gate arrays, performance analysis, scalability}
}
@article{ring-all-reduce:jpdc:2009,
  title       = {Bandwidth Optimal All-Reduce Algorithms for Clusters of Workstations},
  author      = {Patarasuk, Pitch and Yuan, Xin},
  year        = 2009,
  month       = {feb},
  journal     = {J. Parallel Distrib. Comput.},
  volume      = 69,
  number      = 2,
  pages       = {117–124},
  optabstract = {We consider an efficient realization of the all-reduce operation with large data sizes in cluster environments, under the assumption that the reduce operator is associative and commutative. We derive a tight lower bound of the amount of data that must be communicated in order to complete this operation and propose a ring-based algorithm that only requires tree connectivity to achieve bandwidth optimality. Unlike the widely used butterfly-like all-reduce algorithm that incurs network contention in SMP/multi-core clusters, the proposed algorithm can achieve contention-free communication in almost all contemporary clusters, including SMP/multi-core clusters and Ethernet switched clusters with multiple switches. We demonstrate that the proposed algorithm is more efficient than other algorithms on clusters with different nodal architectures and networking technologies when the data size is sufficiently large.},
  optnumpages = 8,
  optkeywords = {All-reduce, Cluster of workstations, Tree topology, Collective communication}
}
@misc{oneapi,
  title = {One API Documentation},
  url   = {https://spec.oneapi.io/versions/latest/index.html}
}
@misc{opencl,
  title = {OpenCL API Documentation},
  url   = {https://man.opencl.org/}
}
@inproceedings{cxl-model:exhet:2022,
  title       = {Design and Analysis of CXL Performance Models for Tightly-Coupled Heterogeneous Computing},
  author      = {Cabrera, Anthony M and Young, Aaron R and Vetter, Jeffrey S},
  year        = 2022,
  booktitle   = {Proceedings of the 1st International Workshop on Extreme Heterogeneity Solutions},
  keywords    = {GPU, CXL, GPU-FPGA collaboration, heterogeneous computing, FPGA},
  optlocation = {Seoul, Republic of Korea},
  optseries   = {ExHET '22}
}
@inproceedings{ftrans:ispled:2020,
  title     = {Ftrans: energy-efficient acceleration of transformers using fpga},
  author    = {Li, Bingbing and Pandey, Santosh and Fang, Haowen and Lyv, Yanjun and Li, Ji and Chen, Jieyang and Xie, Mimi and Wan, Lipeng and Liu, Hang and Ding, Caiwen},
  year      = 2020,
  booktitle = {ISPLES}
}
@misc{ner-transformer,
  title = {Transformers based Named Entity Recognition models},
  url   = {https://huggingface.co/Jean-Baptiste/roberta-large-ner-english}
}
@misc{broadcom:pcie-switches,
  title = {Broadcom PEX88000 Managed PCI Express 4.0 Switches},
  url   = {https://www.broadcom.com/products/pcie-switches-bridges/expressfabric}
}
@article{axdimm:ieee-micro:2021,
  title   = {Near-Memory Processing in Action: Accelerating Personalized Recommendation with AxDIMM},
  author  = {Ke, Liu and Zhang, Xuan and So, Jinin and Lee, Jong-Geon and Kang, Shin-Haeng and Lee, Sukhan and Han, Songyi and Cho, Yeongon and Kim, Jin Hyun and Kwon, Yongsuk and Kim, Kyungsoo and Jung, Jin and Yun, Ilkwon and Park, Sung Joo and Park, Hyunsun and Song, Joonho and Cho, Jeonghyeon and Sohn, Kyomin and Kim, Nam Sung and Lee, Hsien-Hsin Sean},
  year    = 2021,
  journal = {IEEE Micro}
}
@misc{microsoft-azure:zipline:2019,
  title = {Azure Zipline},
  url   = {https://azure.microsoft.com/en-us/blog/improved-cloud-service-performance-through-asic-acceleration/}
}
@misc{samsung-smartSSD:documentation:2020,
  title = {SmartSSD Documentation},
  url   = {https://www.xilinx.com/content/dam/xilinx/support/documents/boards_and_kits/accelerator-cards/1_3/ug1382-smartssd-csd.pdf}
}
@misc{napi:kernel:2022,
  title = {NAPI},
  url   = {https://www.kernel.org/doc/html/next/networking/napi.html}
}
@misc{xilinx-dma_buf:xrt:2022,
  title = {Xilinx XRT DMA-BUF API},
  url   = {https://xilinx.github.io/XRT/master/html/xrt_native_apis.html#dma-buf-api}
}
@misc{dma_buf:kernel:2022,
  title = {dma-buf},
  url   = {https://docs.kernel.org/driver-api/dma-buf.html}
}
@misc{supermicro-sapphire-rapids,
  title = {Intel Built-In Accelerators},
  url   = {https://www.supermicro.com/en/accelerators/intel/built-in-on-demand}
}
@inproceedings{intel-sapphire-rapids:hotchips:2021,
  title     = {Sapphire Rapids},
  author    = {Biswas, Arijit},
  year      = 2021,
  booktitle = {Hot Chips}
}
@misc{intel-vtune-top-down,
  title = {Intel VTune Top-down Analysis},
  url   = {https://indico.cern.ch/event/280897/contributions/1628888/attachments/515367/711139/Top_Down_for_CERN_2nd_workshop_-_Ahmad_Yasin.pdf}
}
@misc{intel-vtune,
  title = {Intel VTune Profiler},
  url   = {https://www.intel.com/content/www/us/en/developer/tools/oneapi/vtune-profiler.html}
}

@misc{wiki-spectrogram,
  title = {Spectrogram},
  url   = {https://en.wikipedia.org/wiki/Spectrogram}
}
@misc{wiki-mel-scale,
  title = {Mel scale},
  url   = {https://en.wikipedia.org/wiki/Mel_scale}
}
@article{nitro-for-hpc:ieee-micro:2020,
  title   = {A Cloud-Optimized Transport Protocol for Elastic and Scalable HPC},
  author  = {Shalev, Leah and Ayoub, Hani and Bshara, Nafea and Sabbag, Erez},
  year    = 2020,
  journal = {IEEE Micro},
  volume  = 40,
  number  = 6
}
@misc{aws-nitro,
  title = {{AWS Nitro}},
  url   = {https://aws.amazon.com/blogs/hpc/bare-metal-performance-with-the-aws-nitro-system/}
}
@inproceedings{accelnet:nsdi:2018,
  title     = {Azure Accelerated Networking: {SmartNICs} in the Public Cloud},
  author    = {Daniel Firestone and Andrew Putnam and Sambhrama Mundkur and Derek Chiou and Alireza Dabagh and Mike Andrewartha and Hari Angepat and Vivek Bhanu and Adrian Caulfield and Eric Chung and Harish Kumar Chandrappa and Somesh Chaturmohta and Matt Humphrey and Jack Lavier and Norman Lam and Fengfen Liu and Kalin Ovtcharov and Jitu Padhye and Gautham Popuri and Shachar Raindel and Tejas Sapre and Mark Shaw and Gabriel Silva and Madhan Sivakumar and Nisheeth Srivastava and Anshuman Verma and Qasim Zuhair and Deepak Bansal and Doug Burger and Kushagra Vaid and David A. Maltz and Albert Greenberg},
  year      = 2018,
  booktitle = {NSDI}
}
@inproceedings{coyote:osdi:2020,
  title     = {Do {OS} abstractions make sense on {FPGAs}?},
  author    = {Dario Korolija and Timothy Roscoe and Gustavo Alonso},
  year      = 2020,
  booktitle = {OSDI}
}
@inproceedings{optimus-hypervisor:asplos:2020,
  title       = {A Hypervisor for Shared-Memory FPGA Platforms},
  author      = {Ma, Jiacheng and Zuo, Gefei and Loughlin, Kevin and Cheng, Xiaohe and Liu, Yanqiang and Eneyew, Abel Mulugeta and Qi, Zhengwei and Kasikci, Baris},
  year        = 2020,
  booktitle   = {ASPLOS},
  optabstract = {Cloud providers widely deploy FPGAs as application-specific accelerators for customer use. These providers seek to multiplex their FPGAs among customers via virtualization, thereby reducing running costs. Unfortunately, most virtualization support is confined to FPGAs that expose a restrictive, host-centric programming model in which accelerators cannot issue direct memory accesses (DMAs). The host-centric model incurs high runtime overhead for workloads that exhibit pointer chasing. Thus, FPGAs are beginning to support a shared-memory programming model in which accelerators can issue DMAs. However, virtualization support for shared-memory FPGAs is limited. This paper presents Optimus, the first hypervisor that supports scalable shared-memory FPGA virtualization. Optimus offers both spatial multiplexing and temporal multiplexing to provide efficient and flexible sharing of each accelerator on an FPGA. To share the FPGA-CPU interconnect at a high clock frequency, Optimus implements a multiplexer tree. To isolate each guest's address space, Optimus introduces the technique of page table slicing as a hardware-software co-design. To support preemptive temporal multiplexing, Optimus provides an accelerator preemption interface. We show that Optimus supports eight physical accelerators on a single FPGA and improves the aggregate throughput of twelve real-world benchmarks by 1.98x-7x.}
}
@misc{nvidia-dali:2018,
  title = {Nvidia DALI},
  url   = {https://developer.nvidia.com/dali}
}
@misc{gpudirect:2019,
  title = {GPUDirect},
  url   = {https://developer.nvidia.com/gpudirect}
}
@inproceedings{optimusprime:asplos:2020,
  title     = {Optimus Prime: Accelerating Data Transformation in Servers},
  author    = {Pourhabibi, Arash and Gupta, Siddharth and Kassir, Hussein and Sutherland, Mark and Tian, Zilu and Drumond, Mario Paulo and Falsafi, Babak and Koch, Christoph},
  year      = 2020,
  booktitle = {ASPLOS}
}
@inproceedings{dcs:micro:2015,
  title       = {DCS: A Fast and Scalable Device-Centric Server Architecture},
  author      = {Ahn, Jaehyung and Kwon, Dongup and Kim, Youngsok and Ajdari, Mohammadamin and Lee, Jaewon and Kim, Jangwoo},
  year        = 2015,
  booktitle   = {MICRO},
  optabstract = {Conventional servers have achieved high performance by employing fast CPUs to run compute-intensive workloads, while making operating systems manage relatively slow I/O devices through memory accesses and interrupts. However, as the emerging workloads are becoming heavily data-intensive and the emerging devices (e.g., NVM storage, high-bandwidth NICs, and GPUs) come to enable low-latency and high-bandwidth device operations, the traditional host-centric server architectures fail to deliver high performance due to their inefficient device handling mechanisms. Furthermore, without resolving the architecture inefficiency, the performance loss will continue to increase as the emerging devices become faster.In this paper, we propose DCS, a novel device-centric server architecture to fully exploit the potential of the emerging devices so that the server performance nicely scales with the performance of the devices. The key idea of DCS is to orchestrate the devices to directly communicate with each other while selectively bypassing the host. The host becomes responsible for only few device-related operations (e.g., filesystem lookup). In this way, DCS achieves high I/O performance by direct inter-device communications and high computation performance by fully utilizing the host-side resources. To implement DCS, we introduce DCS Engine, a custom hardware device to orchestrate devices via standard I/O protocols (i.e., PCIe and NVMe), along with its device driver and user-level library. We show that our FPGA-based DCS prototype significantly improves the performance of emerging server workloads and the architecture will nicely scale with the performance of the devices.}
}
@inproceedings{dcs-ctrl:isca:2018,
  title     = {DCS-ctrl: A Fast and Flexible Device-Control Mechanism for Device-Centric Server Architecture},
  author    = {Kwon, Dongup and Ahn, Jaehyung and Chae, Dongju and Ajdari, Mohammadamin and Lee, Jaewon and Bae, Suheon and Kim, Youngsok and Kim, Jangwoo},
  year      = 2018,
  booktitle = {ISCA}
}
@article{tmdmpi:trets:2010,
  title       = {MPI as a Programming Model for High-Performance Reconfigurable Computers},
  author      = {Salda\~{n}a, Manuel and Patel, Arun and Madill, Christopher and Nunes, Daniel and Wang, Danyao and Chow, Paul and Wittig, Ralph and Styles, Henry and Putnam, Andrew},
  year        = 2010,
  journal     = {ACM Trans. Reconfigurable Technol. Syst.},
  volume      = 3,
  number      = 4,
  optabstract = {High-Performance Reconfigurable Computers (HPRCs) consist of one or more standard microprocessors tightly-coupled with one or more reconfigurable FPGAs. HPRCs have been shown to provide good speedups and good cost/performance ratios, but not necessarily ease of use, leading to a slow acceptance of this technology. HPRCs introduce new design challenges, such as the lack of portability across platforms, incompatibilities with legacy code, users reluctant to change their code base, a prolonged learning curve, and the need for a system-level Hardware/Software co-design development flow. This article presents the evolution and current work on TMD-MPI, which started as an MPI-based programming model for Multiprocessor Systems-on-Chip implemented in FPGAs, and has now evolved to include multiple X86 processors. TMD-MPI is shown to address current design challenges in HPRC usage, suggesting that the MPI standard has enough syntax and semantics to program these new types of parallel architectures. Also presented is the TMD-MPI Ecosystem, which consists of research projects and tools that are developed around TMD-MPI to further improve HPRC usability. Finally, we present preliminary communication performance measurements.}
}
@inproceedings{tmdmpi:fpl:2006,
  title     = {TMD-MPI: An MPI Implementation for Multiple Processors Across Multiple FPGAs},
  author    = {Saldana, Manuel and Chow, Paul},
  year      = 2006,
  booktitle = {FPL}
}
@article{asicclouds:cacm:2020,
  title   = {ASIC Clouds: Specializing the Datacenter for Planet-Scale Applications},
  author  = {Taylor, Michael Bedford and Vega, Luis and Khazraee, Moein and Magaki, Ikuo and Davidson, Scott and Richmond, Dustin},
  year    = 2020,
  journal = {CACM}
}
@article{meta-inference-accelerator:arxiv:2021,
  title   = {First-generation Inference Accelerator Deployment at Facebook},
  author  = {Anderson, Michael and Chen, Benny and Chen, Stephen and Deng, Summer and Fix, Jordan and Gschwind, Michael and Kalaiah, Aravind and Kim, Changkyu and Lee, Jaewon and Liang, Jason and others},
  year    = 2021,
  journal = {arXiv preprint}
}
@article{meta-training:arxiv:2020,
  title   = {Deep Learning Training in Facebook Data Centers: Design of Scale-up and Scale-out Systems},
  author  = {Naumov, Maxim and Kim, John and Mudigere, Dheevatsa and Sridharan, Srinivas and Wang, Xiaodong and Zhao, Whitney and Yilmaz, Serhat and Kim, Changkyu and Yuen, Hector and Ozdal, Mustafa and Nair, Krishnakumar and Gao, Isabel and Su, Bor-Yiing and Yang, Jiyan and Smelyanskiy, Mikhail},
  year    = 2020,
  journal = {arXiv preprint}
}
@misc{meta-mount-shasta:2019,
  title = {Mount Shasta for Video Transcoding},
  url   = {https://engineering.fb.com/2019/03/14/data-center-engineering/accelerating-infrastructure/}
}
@misc{nxp-powerquad:2019,
  title = {AN12282: Digital Signal Processing for NXP LPC5500 Using PowerQuad},
  url   = {https://www.nxp.com/docs/en/application-note/AN12282.pdf}
}
@misc{analog-devices-ffta:2019,
  title = {C/C++ Library Manual for SHARC Processors},
  url   = {https://www.analog.com/media/en/dsp-documentation/softwaremanuals/cces-sharclibrary-manual.pdf}
}
@inproceedings{fftw-acc:hpec:2022,
  title     = {A High Throughput Hardware Accelerator for FFTW Codelets: A First Look},
  author    = {Tang, Larry and Chen, Siyuan and Harisrikanth, Keshav and Xu, Guanglin and Mai, Ken and Franchetti, Franz},
  year      = 2022,
  booktitle = {HPEC)}
}
@inproceedings{facc:pldi:2022,
  title       = {Bind the Gap: Compiling Real Software to Hardware FFT Accelerators},
  author      = {Woodruff, Jackson and Armengol-Estap\'{e}, Jordi and Ainsworth, Sam and O'Boyle, Michael F. P.},
  year        = 2022,
  booktitle   = {PLDI},
  optabstract = {Specialized hardware accelerators continue to be a source of performance improvement. However, such specialization comes at a programming price. The fundamental issue is that of a mismatch between the diversity of user code and the functionality of fixed hardware, limiting its wider uptake. Here we focus on a particular set of accelerators: those for Fast Fourier Transforms. We present FACC (Fourier ACcelerator Compiler), a novel approach to automatically map legacy code to Fourier Transform accelerators. It automatically generates drop-in replacement adapters using Input-Output (IO)-based program synthesis that bridge the gap between user code and accelerators. We apply FACC to unmodified GitHub C programs of varying complexity and compare against two existing approaches. We target FACC to a high-performance library, FFTW, and two hardware accelerators, the NXP PowerQuad and the Analog Devices FFTA, and demonstrate mean speedups of 9x, 17x and 27x respectively}
}
@inproceedings{rxpsc:dac:2021,
  title     = {New Regular Expressions on Old Accelerators},
  author    = {Woodruff, Jackson and O’Boyle, Michael F.P.},
  year      = 2021,
  booktitle = {DAC)}
}
@inproceedings{amd-400g-smartnic:hotchips:2022,
  title     = {AMD 400G Adaptive SmartNIC SoC: Technology preview},
  author    = {Dastidar, Jaideep and Riddoch, David and Moore, Jason and Pope, Steve and Wesselkamper, Jim},
  year      = 2022,
  booktitle = {2022 IEEE Hot Chips 34 Symposium (HCS)}
}
@inproceedings{auto-nic-offload:asplos:2021,
  title       = {Autonomous NIC Offloads},
  author      = {Pismenny, Boris and Eran, Haggai and Yehezkel, Aviad and Liss, Liran and Morrison, Adam and Tsafrir, Dan},
  year        = 2021,
  booktitle   = {ASPLOS},
  optabstract = {CPUs routinely offload to NICs network-related processing tasks like packet segmentation and checksum. NIC offloads are advantageous because they free valuable CPU cycles. But their applicability is typically limited to layer≤4 protocols (TCP and lower), and they are inapplicable to layer-5 protocols (L5Ps) that are built on top of TCP. This limitation is caused by a misfeature we call ”offload dependence,” which dictates that L5P offloading additionally requires offloading the underlying layer≤4 protocols and related functionality: TCP, IP, firewall, etc. The dependence of L5P offloading hinders innovation, because it implies hard-wiring the complicated, ever-changing implementation of the lower-level protocols. We propose ”autonomous NIC offloads,” which eliminate offload dependence. Autonomous offloads provide a lightweight software-device architecture that accelerates L5Ps without having to migrate the entire layer≤4 TCP/IP stack into the NIC. A main challenge that autonomous offloads address is coping with out-of-sequence packets. We implement autonomous offloads for two L5Ps: (i) NVMe-over-TCP zero-copy and CRC computation, and (ii) https authentication, encryption, and decryption. Our autonomous offloads increase throughput by up to 3.3x, and they deliver CPU consumption and latency that are as low as 0.4x and 0.7x, respectively. Their implementation is already upstreamed in the Linux kernel, and they will be supported in the next-generation of Mellanox NICs.}
}
@inproceedings{regx:micro:2012,
  title     = {Designing a Programmable Wire-Speed Regular-Expression Matching Accelerator},
  author    = {Lunteren, Jan Van and Hagleitner, Christoph and Heil, Timothy and Biran, Giora and Shvadron, Uzi and Atasu, Kubilay},
  year      = 2012,
  booktitle = {MICRO}
}
@inproceedings{hare:micro:2016,
  title     = {HARE: Hardware accelerator for regular expressions},
  author    = {Gogte, Vaibhav and Kolli, Aasheesh and Cafarella, Michael J. and D'Antoni, Loris and Wenisch, Thomas F.},
  year      = 2016,
  booktitle = {MICRO}
}
@inproceedings{cloud-scale-acc:micro:2016,
  title     = {A cloud-scale acceleration architecture},
  author    = {Caulfield, Adrian M. and Chung, Eric S. and Putnam, Andrew and Angepat, Hari and Fowers, Jeremy and Haselman, Michael and Heil, Stephen and Humphrey, Matt and Kaur, Puneet and Kim, Joo-Young and Lo, Daniel and Massengill, Todd and Ovtcharov, Kalin and Papamichael, Michael and Woods, Lisa and Lanka, Sitaram and Chiou, Derek and Burger, Doug},
  year      = 2016,
  booktitle = {MICRO}
}
@inproceedings{cheetah:hpca:2021,
  title     = {Cheetah: Optimizing and Accelerating Homomorphic Encryption for Private Inference},
  author    = {Reagen, Brandon and Choi, Woo-Seok and Ko, Yeongil and Lee, Vincent T. and Lee, Hsien-Hsin S. and Wei, Gu-Yeon and Brooks, David},
  year      = 2021,
  booktitle = {HPCA}
}
@inproceedings{bts:isca:2022,
  title       = {BTS: An Accelerator for Bootstrappable Fully Homomorphic Encryption},
  author      = {Kim, Sangpyo and Kim, Jongmin and Kim, Michael Jaemin and Jung, Wonkyung and Kim, John and Rhu, Minsoo and Ahn, Jung Ho},
  year        = 2022,
  booktitle   = {ISCA},
  optabstract = {Homomorphic encryption (HE) enables the secure offloading of computations to the cloud by providing computation on encrypted data (ciphertexts). HE is based on noisy encryption schemes in which noise accumulates as more computations are applied to the data. The limited number of operations applicable to the data prevents practical applications from exploiting HE. Bootstrapping enables an unlimited number of operations or fully HE (FHE) by refreshing the ciphertext. Unfortunately, bootstrapping requires a significant amount of additional computation and memory bandwidth as well. Prior works have proposed hardware accelerators for computation primitives of FHE. However, to the best of our knowledge, this is the first to propose a hardware FHE accelerator that supports bootstrapping as a first-class citizen.In particular, we propose BTS --- Bootstrappable, Technology-driven, Secure accelerator architecture for FHE. We identify the challenges of supporting bootstrapping in the accelerator and analyze the off-chip memory bandwidth and computation required. In particular, given the limitations of modern memory technology, we identify the HE parameter sets that are efficient for FHE acceleration. Based on the insights gained from our analysis, we propose BTS, which effectively exploits the parallelism innate in HE operations by arranging a massive number of processing elements in a grid. We present the design and microarchitecture of BTS, including a network-on-chip design that exploits a deterministic communication pattern. BTS shows 5,556\texttimes{} and 1,306\texttimes{} improved execution time on ResNet-20 and logistic regression over a CPU, with a chip area of 373.6mm2 and up to 163.2W of power.}
}
@misc{ibm-aiu:2022,
  title = {{IBM} Artificial Intelligence Unit},
  url   = {https://research.ibm.com/blog/ibm-artificial-intelligence-unit-aiu}
}
@inproceedings{ibm-compression-accelerator:isca:2020,
  title     = {Data Compression Accelerator on IBM POWER9 and z15 Processors : Industrial Product},
  author    = {Abali, Bulent and Blaner, Bart and Reilly, John and Klein, Matthias and Mishra, Ashutosh and Agricola, Craig B. and Sendir, Bedri and Buyuktosunoglu, Alper and Jacobi, Christian and Starke, William J. and Myneni, Haren and Wang, Charlie},
  year      = 2020,
  booktitle = {ISCA}
}
@inproceedings{google-vcu:asplos:2021,
  title       = {Warehouse-Scale Video Acceleration: Co-Design and Deployment in the Wild},
  author      = {Ranganathan, Parthasarathy and Stodolsky, Daniel and Calow, Jeff and Dorfman, Jeremy and Guevara, Marisabel and Smullen IV, Clinton Wills and Kuusela, Aki and Balasubramanian, Raghu and Bhatia, Sandeep and Chauhan, Prakash and Cheung, Anna and Chong, In Suk and Dasharathi, Niranjani and Feng, Jia and Fosco, Brian and Foss, Samuel and Gelb, Ben and Gwin, Sara J. and Hase, Yoshiaki and He, Da-ke and Ho, C. Richard and Huffman Jr., Roy W. and Indupalli, Elisha and Jayaram, Indira and Kongetira, Poonacha and Kyaw, Cho Mon and Laursen, Aaron and Li, Yuan and Lou, Fong and Lucke, Kyle A. and Maaninen, JP and Macias, Ramon and Mahony, Maire and Munday, David Alexander and Muroor, Srikanth and Penukonda, Narayana and Perkins-Argueta, Eric and Persaud, Devin and Ramirez, Alex and Rautio, Ville-Mikko and Ripley, Yolanda and Salek, Amir and Sekar, Sathish and Sokolov, Sergey N. and Springer, Rob and Stark, Don and Tan, Mercedes and Wachsler, Mark S. and Walton, Andrew C. and Wickeraad, David A. and Wijaya, Alvin and Wu, Hon Kwan},
  year        = 2021,
  booktitle   = {ASPLOS},
  optabstract = {Video sharing (e.g., YouTube, Vimeo, Facebook, TikTok) accounts for the majority of internet traffic, and video processing is also foundational to several other key workloads (video conferencing, virtual/augmented reality, cloud gaming, video in Internet-of-Things devices, etc.). The importance of these workloads motivates larger video processing infrastructures and – with the slowing of Moore’s law – specialized hardware accelerators to deliver more computing at higher efficiencies. This paper describes the design and deployment, at scale, of a new accelerator targeted at warehouse-scale video transcoding. We present our hardware design including a new accelerator building block – the video coding unit (VCU) – and discuss key design trade-offs for balanced systems at data center scale and co-designing accelerators with large-scale distributed software systems. We evaluate these accelerators “in the wild" serving live data center jobs, demonstrating 20-33x improved efficiency over our prior well-tuned non-accelerated baseline. Our design also enables effective adaptation to changing bottlenecks and improved failure management, and new workload capabilities not otherwise possible with prior systems. To the best of our knowledge, this is the first work to discuss video acceleration at scale in large warehouse-scale environments.}
}
@misc{aws-inferentia:2019,
  title = {{AWS} Inferentia},
  url   = {https://aws.amazon.com/machine-learning/inferentia/}
}
@misc{aws-trainium:2022,
  title = {{AWS} Trainium},
  url   = {https://aws.amazon.com/machine-learning/trainium/}
}
@inproceedings{ava:asplos:2020,
  title     = {AvA: Accelerated Virtualization of Accelerators},
  author    = {Yu, Hangchen and Peters, Arthur Michener and Akshintala, Amogh and Rossbach, Christopher J.},
  year      = 2020,
  booktitle = {ASPLOS}
}
@inproceedings{synergy:asplos:2021,
  title     = {Compiler-Driven FPGA Virtualization with SYNERGY},
  author    = {Landgraf, Joshua and Yang, Tiffany and Lin, Will and Rossbach, Christopher J. and Schkufza, Eric},
  year      = 2021,
  booktitle = {ASPLOS}
}
@inproceedings{gables:hpca:2019,
  title     = {Gables: A Roofline model for Mobile SoCs},
  author    = {Hill, Mark and Reddi, Vijay Janapa},
  year      = 2019,
  booktitle = {HPCA}
}
@article{poas:arxiv:2022,
  title   = {POAS: A High-performance Scheduling Framework For Exploiting Accelerator Level Parallelism},
  author  = {Mart{\'\i}nez, Pablo Antonio and Bernab{\'e}, Gregorio and Garc{\'\i}a, Jose Manuel},
  year    = 2022,
  journal = {arXiv preprint}
}
@inproceedings{meet-the-walker:micro:2013,
  title     = {Meet the Walkers: Accelerating Index Traversals for in-Memory Databases},
  author    = {Kocberber, Onur and Grot, Boris and Picorel, Javier and Falsafi, Babak and Lim, Kevin and Ranganathan, Parthasarathy},
  year      = 2013,
  booktitle = {MICRO}
}
@article{alp:cacm:2021,
  title   = {Accelerator-level Parallelism},
  author  = {Hill, Mark D and Reddi, Vijay Janapa},
  year    = 2021,
  journal = {Communications of the ACM},
  volume  = 64,
  number  = 12,
  pages   = {36--38}
}
@inproceedings{robomorphic:asplos:2021,
  title     = {Robomorphic Computing: A Design Methodology for Domain-Specific Accelerators Parameterized by Robot Morphology},
  author    = {Neuman, Sabrina M. and Plancher, Brian and Bourgeat, Thomas and Tambe, Thierry and Devadas, Srinivas and Reddi, Vijay Janapa},
  year      = 2021,
  booktitle = {ASPLOS}
}
@inproceedings{dua:nsdi:2019,
  title     = {Direct Universal Access: Making Data Center Resources Available to {FPGA}},
  author    = {Ran Shu and Peng Cheng and Guo Chen and Zhiyuan Guo and Lei Qu and Yongqiang Xiong and Derek Chiou and Thomas Moscibroda},
  year      = 2019,
  booktitle = {NSDI}
}

@inproceedings{morpheus:isca:2016,
  title     = {Morpheus: Creating Application Objects Efficiently for Heterogeneous Computing},
  author    = {Tseng, Hung-Wei and Zhao, Qianchen and Zhou, Yuxiao and Gahagan, Mark and Swanson, Steven},
  year      = 2016,
  booktitle = {ISCA}
}
@inproceedings{lynx:asplos:2020,
  title     = {Lynx: A SmartNIC-Driven Accelerator-Centric Architecture for Network Servers},
  author    = {Tork, Maroun and Maudlej, Lina and Silberstein, Mark},
  year      = 2020,
  booktitle = {ASPLOS}
}
@inproceedings{zeppelin:isscc:2018,
  title     = {Zeppelin: An SoC for multichip architectures},
  author    = {Beck, Noah and White, Sean and Paraschou, Milam and Naffziger, Samuel},
  year      = 2018,
  booktitle = {IEEE ISSCC}
}
@misc{xilinx-xdma-perf-limit,
  title = {Xilinx XDMA Performance},
  url   = {https://support.xilinx.com/s/article/68049}
}
@misc{xilinx-xdma,
  title = {Xilinx XDMA Performance},
  url   = {https://github.com/Xilinx/dma_ip_drivers/tree/master/XDMA}
}
@misc{linux-gem-lwn,
  title = {LWN.net article on GEM},
  url   = {https://lwn.net/Articles/283798/}
}
@misc{linux-drm-gem,
  title = {Linux kernel DRM-GEM drivers},
  url   = {https://www.kernel.org/doc/html/latest/gpu/drm-mm.html}
}
@inproceedings{acc-yolov3:iscas:2020,
  title     = {Accelerating Tiny YOLOv3 using FPGA-Based Hardware/Software Co-Design},
  author    = {Ahmad, Afzal and Pasha, Muhammad Adeel and Raza, Ghulam Jilani},
  year      = 2020,
  booktitle = {IEEE ISCAS}
}
@article{chiosa:pvldb:2022,
  title   = {Hardware Acceleration of Compression and Encryption in SAP HANA},
  author  = {Chiosa, Monica and Maschi, Fabio and M\"{u}ller, Ingo and Alonso, Gustavo and May, Norman},
  year    = 2022,
  journal = {Proc. VLDB Endow.},
  volume  = 15,
  number  = 12,
  pages   = {3277–3291}
}
@inproceedings{doppiodb:fpl:2017,
  title     = {doppioDB: A hardware accelerated database},
  author    = {Sidler, David and Owaida, Muhsen and István, Zsolt and Kara, Kaan and Alonso, Gustavo},
  year      = 2017,
  booktitle = {FPL}
}
@inproceedings{casper:fpga:2014,
  title     = {Hardware Acceleration of Database Operations},
  author    = {Casper, Jared and Olukotun, Kunle},
  year      = 2014,
  booktitle = {ACM FPGA}
}
@misc{microsoft-presidio,
  title = {Presidio: Data Protection and Anonymization SDK},
  url   = {https://microsoft.github.io/presidio/}
}
@misc{urban-sound-detection,
  title = {Urban Sound Detection},
  url   = {https://urbansounddataset.weebly.com/urbansound8k.html}
}
@inproceedings{rldbs:ijcai:2020,
  title     = {Reinforcement Learning Framework for Deep Brain Stimulation Study},
  author    = {Krylov, Dmitrii and des Combes, Remi and Laroche, Romain and Rosenblum, Michael and Dylov, Dmitry V},
  year      = 2020,
  booktitle = {IJCAI}
}
@misc{aws-vt1-instance,
  title = {AWS vt1 instance},
  url   = {https://xilinx.github.io/video-sdk/v1.5/getting_started_on_vt1.html}
}
@misc{xilinx-vitis-libraries,
  title = {Xilinx Vitis Libraries},
  url   = {https://xilinx.github.io/Vitis_Libraries/}
}
@misc{xilinx-vitis-database,
  title = {Xilinx Vitis Database Library},
  url   = {https://xilinx.github.io/Vitis_Libraries/database/2022.1/index.html}
}
@misc{xilinx-vitis-data-analytics,
  title = {Xilinx Vitis Data Analytics Library},
  url   = {https://xilinx.github.io/Vitis_Libraries/data_analytics/2022.1/index.html}
}
@misc{xilinx-vitis-data-compression,
  title = {Xilinx Vitis Data Compression Library},
  url   = {https://xilinx.github.io/Vitis_Libraries/data_compression/2022.1/index.html}
}
@misc{xilinx-vitis-security,
  title = {Xilinx Vitis Security Library},
  url   = {https://xilinx.github.io/Vitis_Libraries/security/2022.1/index.html}
}
@misc{xilinx-vitis-dsp,
  title = {Xilinx Vitis DSP Library},
  url   = {https://xilinx.github.io/Vitis_Libraries/dsp/2022.1/index.html}
}
@misc{xilinx-u30-vcu,
  title = {Xilinx U30 VCU},
  url   = {https://www.xilinx.com/content/dam/xilinx/support/documents/data_sheets/ds970-u30.pdf}
}
@inproceedings{spin:atc:2017,
  title     = {{SPIN}: Seamless Operating System Integration of {Peer-to-Peer} {DMA} Between {SSDs} and {GPUs}},
  author    = {Shai Bergman and Tanya Brokhman and Tzachi Cohen and Mark Silberstein},
  year      = 2017,
  booktitle = {ATC}
}
@inproceedings{p2pdma:apsys:2020,
  title     = {How Beneficial is Peer-to-Peer DMA?},
  author    = {Nakamura, Ryo and Kuga, Yohei and Akashi, Kunio},
  year      = 2020,
  booktitle = {APSys}
}
@inproceedings{floem:osdi:2018,
  title     = {Floem: A Programming System for {NIC-Accelerated} Network Applications},
  author    = {Phitchaya Mangpo Phothilimthana and Ming Liu and Antoine Kaufmann and Simon Peter and Rastislav Bodik and Thomas Anderson},
  year      = 2018,
  booktitle = {OSDI}
}
@inproceedings{tensorflow:osdi:2016,
  title     = {{TensorFlow}: A System for {Large-Scale} Machine Learning},
  author    = {Mart{\'\i}n Abadi and Paul Barham and Jianmin Chen and Zhifeng Chen and Andy Davis and Jeffrey Dean and Matthieu Devin and Sanjay Ghemawat and Geoffrey Irving and Michael Isard and Manjunath Kudlur and Josh Levenberg and Rajat Monga and Sherry Moore and Derek G. Murray and Benoit Steiner and Paul Tucker and Vijay Vasudevan and Pete Warden and Martin Wicke and Yuan Yu and Xiaoqiang Zheng},
  year      = 2016,
  booktitle = {OSDI}
}
@misc{apache:beam,
  title = {Apache Beam},
  year  = 2021,
  url   = {https://beam.apache.org/}
}
@misc{google:dataflow,
  title = {Google Dataflow},
  year  = 2021,
  url   = {https://cloud.google.com/dataflow}
}
@inproceedings{naiad:sosp:2013,
  title     = {Naiad: A Timely Dataflow System},
  author    = {Murray, Derek G. and McSherry, Frank and Isaacs, Rebecca and Isard, Michael and Barham, Paul and Abadi, Mart\'{\i}n},
  year      = 2013,
  booktitle = {SOSP}
}
@inproceedings{dandelion:sosp:2013,
  title     = {Dandelion: A Compiler and Runtime for Heterogeneous Systems},
  author    = {Rossbach, Christopher J. and Yu, Yuan and Currey, Jon and Martin, Jean-Philippe and Fetterly, Dennis},
  year      = 2013,
  booktitle = {SOSP}
}
@inproceedings{logca:isca:2017,
  title     = {LogCA: A High-Level Performance Model for Hardware Accelerators},
  author    = {Altaf, Muhammad Shoaib Bin and Wood, David A.},
  year      = 2017,
  booktitle = {ISCA}
}
@article{tf.data:pvldb:2021,
  title   = {Tf.Data: A Machine Learning Data Processing Framework},
  author  = {Murray, Derek G. and \v{S}im\v{s}a, Ji\v{r}\'{\i} and Klimovic, Ana and Indyk, Ihor},
  year    = 2021,
  journal = {Proc. VLDB Endow.},
  volume  = 14,
  number  = 12
}
@inproceedings{dsi-dlrm:isca:2022,
  title     = {Understanding Data Storage and Ingestion for Large-Scale Deep Recommendation Model Training: Industrial Product},
  author    = {Zhao, Mark and Agarwal, Niket and Basant, Aarti and Gedik, Bu\u{g}ra and Pan, Satadru and Ozdal, Mustafa and Komuravelli, Rakesh and Pan, Jerry and Bao, Tianshu and Lu, Haowei and Narayanan, Sundaram and Langman, Jack and Wilfong, Kevin and Rastogi, Harsha and Wu, Carole-Jean and Kozyrakis, Christos and Pol, Parik},
  year      = 2022,
  booktitle = {ISCA}
}
@inproceedings{urbansound-dataset:mm:2014,
  title     = {A Dataset and Taxonomy for Urban Sound Research},
  author    = {Salamon, Justin and Jacoby, Christopher and Bello, Juan Pablo},
  year      = 2014,
  booktitle = {ACM Multimedia}
}
@inproceedings{tut-database:eusipco:2016,
  title     = {TUT database for acoustic scene classification and sound event detection},
  author    = {Mesaros, Annamaria and Heittola, Toni and Virtanen, Tuomas},
  year      = 2016,
  booktitle = {2016 24th European Signal Processing Conference (EUSIPCO)}
}
@inproceedings{flexdriver:asplos:2022,
  title     = {FlexDriver: A Network Driver for Your Accelerator},
  author    = {Eran, Haggai and Fudim, Maxim and Malka, Gabi and Shalom, Gal and Cohen, Noam and Hermony, Amit and Levi, Dotan and Liss, Liran and Silberstein, Mark},
  year      = 2022,
  booktitle = {ASPLOS}
}
@inproceedings{nds:micro:2021,
  title     = {NDS: N-Dimensional Storage},
  author    = {Liu, Yu-Chia and Tseng, Hung-Wei},
  year      = 2021,
  booktitle = {MICRO}
}
@misc{nvidia-v100,
  title        = {Nvidia V100 overview},
  url          = {https://www.nvidia.com/en-us/data-center/v100},
  howpublished = {\url{https://www.nvidia.com/en-us/data-center/v100/}}
}
@misc{intel-dsa,
  title = {Intel Data Streaming Accelerator},
  url   = {https://www.intel.com/content/www/us/en/develop/articles/intel-data-streaming-accelerator-architecture-specification.html}
}
@inproceedings{protobuf:isca:2021,
  title     = {A Hardware Accelerator for Protocol Buffers},
  author    = {Karandikar, Sagar and Leary, Chris and Kennelly, Chris and Zhao, Jerry and Parimi, Dinesh and Nikolic, Borivoje and Asanovic, Krste and Ranganathan, Parthasarathy},
  year      = 2021,
  booktitle = {MICRO}
}
@inproceedings{peltenburg-2019-fletcher,
  title     = {Fletcher: A framework to efficiently integrate FPGA accelerators with apache arrow},
  author    = {Peltenburg, Johan and Van Straten, Jeroen and Wijtemans, Lars and Van Leeuwen, Lars and Al-Ars, Zaid and Hofstee, Peter},
  booktitle = {FPL},
  optyear   = 2019
}
@inproceedings{hgum:reconfig:2017,
  title     = {Hgum: Messaging framework for hardware accelerators},
  author    = {Zhang, Sizhuo and Angepat, Hari and Chiou, Derek},
  year      = 2017,
  booktitle = {ReConFig}
}
@inproceedings{llama:socc:2021,
  title     = {Llama: A Heterogeneous \& Serverless Framework for Auto-Tuning Video Analytics Pipelines},
  author    = {Romero, Francisco and Zhao, Mark and Yadwadkar, Neeraja J. and Kozyrakis, Christos},
  year      = 2021,
  booktitle = {SoCC}
}
@inproceedings{interstellar:asplos:2020,
  title     = {Interstellar: Using Halide's Scheduling Language to Analyze DNN Accelerators},
  author    = {Yang, Xuan and Gao, Mingyu and Liu, Qiaoyi and Setter, Jeff and Pu, Jing and Nayak, Ankita and Bell, Steven and Cao, Kaidi and Ha, Heonjae and Raina, Priyanka and Kozyrakis, Christos and Horowitz, Mark},
  year      = 2020,
  booktitle = {ASPLOS}
}
@inproceedings{decibel:nsdi:2017,
  title     = {Decibel: Isolation and Sharing in Disaggregated {Rack-Scale} Storage},
  author    = {Mihir Nanavati and Jake Wires and Andrew Warfield},
  year      = 2017,
  booktitle = {NSDI}
}
@inproceedings{legtchenko:hotstorage:2017,
  title     = {Understanding {Rack-Scale} Disaggregated Storage},
  author    = {Sergey Legtchenko and Hugh Williams and Kaveh Razavi and Austin Donnelly and Richard Black and Andrew Douglas and Nathanael Cheriere and Daniel Fryer and Kai Mast and Angela Demke Brown and Ana Klimovic and Andy Slowey and Antony Rowstron},
  year      = 2017,
  booktitle = {HotStorage}
}
@article{do:cacm:2019,
  title   = {Programmable Solid-State Storage in Future Cloud Datacenters},
  author  = {Do, Jaeyoung and Sengupta, Sudipta and Swanson, Steven},
  year    = 2019,
  journal = {Commun. ACM},
  volume  = 62,
  number  = 6
}
@inproceedings{leapio:asplos:2020,
  title     = {LeapIO: Efficient and Portable Virtual NVMe Storage on ARM SoCs},
  author    = {Li, Huaicheng and Hao, Mingzhe and Novakovic, Stanko and Gogte, Vaibhav and Govindan, Sriram and Ports, Dan R. K. and Zhang, Irene and Bianchini, Ricardo and Gunawi, Haryadi S. and Badam, Anirudh},
  year      = 2020,
  booktitle = {ASPLOS}
}
@inproceedings{flash-disaggregation:eurosys:2016,
  title     = {Flash Storage Disaggregation},
  author    = {Klimovic, Ana and Kozyrakis, Christos and Thereska, Eno and John, Binu and Kumar, Sanjeev},
  year      = 2016,
  booktitle = {EuroSys}
}
@inproceedings{zhu:cluster:2019,
  title     = {Efficient User-Level Storage Disaggregation for Deep Learning},
  author    = {Zhu, Yue and Yu, Weikuan and Jiao, Bing and Mohror, Kathryn and Moody, Adam and Chowdhury, Fahim},
  year      = 2019,
  booktitle = {CLUSTER}
}
@inproceedings{spool:atc:2020,
  title     = {{Spool}: Reliable Virtualized {NVMe} Storage Pool in Public Cloud Infrastructure},
  author    = {Shuai Xue and Shang Zhao and Quan Chen and Gang Deng and Zheng Liu and Jie Zhang and Zhuo Song and Tao Ma and Yong Yang and Yanbo Zhou and Keqiang Niu and Sijie Sun and Minyi Guo},
  year      = 2020,
  booktitle = {ATC}
}
@inproceedings{nvmeof-arm:msst:2019,
  title     = {When NVMe over Fabrics Meets Arm: Performance and Implications},
  author    = {Jia, Yichen and Anger, Eric and Chen, Feng},
  year      = 2019,
  booktitle = {MSST}
}
@inproceedings{nvmeof:systor:2017,
  title     = {NVMe-over-Fabrics Performance Characterization and the Path to Low-Overhead Flash Disaggregation},
  author    = {Guz, Zvika and Li, Harry (Huan) and Shayesteh, Anahita and Balakrishnan, Vijay},
  year      = 2017,
  booktitle = {SYSTOR}
}
@inproceedings{kim:asbd:2017,
  title     = {How Much Computation Power do you need for Near-Data Processing in Cloud?},
  author    = {Namhyung Kim and Jeongseob Ahn and Sungpack Hong and Hassan Chafi and Kiyoung Choi},
  year      = 2017,
  booktitle = {ASBD}
}
@inproceedings{clicknp:sigcomm:2016,
  title     = {ClickNP: Highly Flexible and High Performance Network Processing with Reconfigurable Hardware},
  author    = {Li, Bojie and Tan, Kun and Luo, Layong (Larry) and Peng, Yanqing and Luo, Renqian and Xu, Ningyi and Xiong, Yongqiang and Cheng, Peng and Chen, Enhong},
  year      = 2016,
  booktitle = {SIGCOMM}
}
@inproceedings{pcie-nic:sigcomm:2018,
  title     = {Understanding PCIe Performance for End Host Networking},
  author    = {Neugebauer, Rolf and Antichi, Gianni and Zazo, Jos\'{e} Fernando and Audzevich, Yury and L\'{o}pez-Buedo, Sergio and Moore, Andrew W.},
  year      = 2018,
  booktitle = {SIGCOMM}
}
@inproceedings{memif:asplos:2016,
  title     = {Memif: Towards Programming Heterogeneous Memory Asynchronously},
  author    = {Lin, Felix Xiaozhu and Liu, Xu},
  year      = 2016,
  booktitle = {ASPLOS}
}
@misc{aws-s3-latency:2019,
  title        = {{Amazon CloudWatch Percentiles on Amazon S3}},
  year         = 2019,
  url          = {https://aws.amazon.com/blogs/storage/amazon-s3-cloudwatch-percentiles/},
  howpublished = {\url{https://aws.amazon.com/blogs/storage/amazon-s3-cloudwatch-percentiles/}}
}
@misc{pcie-p2pdma:lwn:2019,
  title = {{Device-to-device memory-transfer offload with P2PDMA}},
  year  = 2019,
  url   = {https://lwn.net/Articles/767281/}
}
@misc{pcie-p2pdma:kernel:2018,
  title = {{PCI Peer-to-Peer DMA Support}},
  year  = 2018,
  url   = {https://docs.kernel.org/driver-api/pci/p2pdma.html}
}
@misc{openwhisk:serverless:2021,
  title        = {{Apache OpenWhisk}},
  url          = {https://openwhisk.apache.org/},
  howpublished = {\url{https://openwhisk.apache.org/}}
}
@inproceedings{gimbal:sigcomm:2021,
  title     = {Gimbal: Enabling Multi-Tenant Storage Disaggregation on SmartNIC JBOFs},
  author    = {Min, Jaehong and Liu, Ming and Chugh, Tapan and Zhao, Chenxingyu and Wei, Andrew and Doh, In Hwan and Krishnamurthy, Arvind},
  year      = 2021,
  booktitle = {SIGCOMM}
}
@inproceedings{i10:osdi:2020,
  title     = {{TCP}$\approx${RDMA}: {CPU-efficient} Remote Storage Access with i10},
  author    = {Jaehyun Hwang and Qizhe Cai and Ao Tang and Rachit Agarwal},
  year      = 2020,
  booktitle = {OSDI}
}

@inproceedings{1rma:sigcomm:2020,
  title     = {{1RMA}: Re-Envisioning Remote Memory Access for Multi-Tenant Datacenters},
  author    = {Singhvi, Arjun and Akella, Aditya and Gibson, Dan and Wenisch, Thomas F. and Wong-Chan, Monica and Clark, Sean and Martin, Milo M. K. and McLaren, Moray and Chandra, Prashant and Cauble, Rob and Wassel, Hassan M. G. and Montazeri, Behnam and Sabato, Simon L. and Scherpelz, Joel and Vahdat, Amin},
  year      = 2020,
  booktitle = {SIGCOMM}
}
@inproceedings{lim:isca:2009,
  title     = {Disaggregated Memory for Expansion and Sharing in Blade Servers},
  author    = {Lim, Kevin and Chang, Jichuan and Mudge, Trevor and Ranganathan, Parthasarathy and Reinhardt, Steven K. and Wenisch, Thomas F.},
  year      = 2009,
  booktitle = {ISCA}
}
@inproceedings{network-for-disaggregation:osdi:2016,
  title     = {Network Requirements for Resource Disaggregation},
  author    = {Peter X. Gao and Akshay Narayan and Sagar Karandikar and Joao Carreira and Sangjin Han and Rachit Agarwal and Sylvia Ratnasamy and Scott Shenker},
  year      = 2016,
  booktitle = {OSDI}
}
@inproceedings{reflex:asplos:2017,
  title     = {ReFlex: Remote Flash $\approx$ Local Flash},
  author    = {Klimovic, Ana and Litz, Heiner and Kozyrakis, Christos},
  year      = 2017,
  booktitle = {ASPLOS}
}
@misc{azure_serverless_computing:2021,
  title        = {Azure serverless},
  url          = {https://azure.microsoft.com/en-us/solutions/serverless/#overview},
  howpublished = {\url{https://azure.microsoft.com/en-us/solutions/serverless/#overview}}
}
@misc{google_cloud_functions:2021,
  title        = {Google Cloud Functions},
  url          = {https://cloud.google.com/functions/docs/concepts/overview},
  howpublished = {\url{https://cloud.google.com/functions/docs/concepts/overview}}
}
@misc{aws_lambda:2021,
  title        = {AWS Lambda},
  url          = {https://aws.amazon.com/lambda/},
  howpublished = {\url{https://aws.amazon.com/lambda/}}
}
@article{rapl:ieee-micro:2012,
  title   = {Power-Management Architecture of the Intel Microarchitecture Code-Named Sandy Bridge},
  author  = {Rotem, Efraim and Naveh, Alon and Ananthakrishnan, Avinash and Weissmann, Eliezer and Rajwan, Doron},
  year    = 2012,
  journal = {IEEE Micro},
  volume  = 32,
  number  = 2
}
@article{rapl-action:tompecs:2018,
  title   = {RAPL in Action: Experiences in Using RAPL for Power Measurements},
  author  = {Khan, Kashif Nizam and Hirki, Mikael and Niemi, Tapio and Nurminen, Jukka K. and Ou, Zhonghong},
  year    = 2018,
  journal = {ACM Trans. Model. Perform. Eval. Comput. Syst.},
  volume  = 3,
  number  = 2
}
@inproceedings{large-scale-ssd:sigmetrics:2015,
  title     = {A Large-Scale Study of Flash Memory Failures in the Field},
  author    = {Meza, Justin and Wu, Qiang and Kumar, Sanjev and Mutlu, Onur},
  year      = 2015,
  booktitle = {SIGMETRICS}
}
@inproceedings{heatwatch:hpca:2018,
  title     = {HeatWatch: Improving 3D NAND Flash Memory Device Reliability by Exploiting Self-Recovery and Temperature Awareness},
  author    = {Luo, Yixin and Ghose, Saugata and Cai, Yu and Haratsch, Erich F. and Mutlu, Onur},
  year      = 2018,
  booktitle = {HPCA}
}
@inproceedings{stannis:dac:2020,
  title     = {Stannis: Low-Power Acceleration of DNN Training Using Computational Storage Devices},
  author    = {HeydariGorji, Ali and Torabzadehkashi, Mahdi and Rezaei, Siavash and Bobarshad, Hossein and Alves, Vladimir and Chou, Pai H.},
  year      = 2020,
  booktitle = {DAC}
}
@inproceedings{barbalace:cidr:2021,
  title     = {Computational Storage: Where Are We Today?},
  author    = {Barbalace, Antonio and Do, Jaeyoung},
  year      = 2021,
  booktitle = {CIDR}
}
@article{asic-cloud:cacm:2020,
  title   = {ASIC Clouds: Specializing the Datacenter for Planet-Scale Applications},
  author  = {Taylor, Michael Bedford and Vega, Luis and Khazraee, Moein and Magaki, Ikuo and Davidson, Scott and Richmond, Dustin},
  year    = 2020,
  journal = {Commun. ACM},
  volume  = 63,
  number  = 7
}
@inproceedings{asic-cloud:isca:2016,
  title     = {ASIC Clouds: Specializing the Datacenter},
  author    = {Magaki, Ikuo and Khazraee, Moein and Gutierrez, Luis Vega and Taylor, Michael Bedford},
  year      = 2016,
  booktitle = {ISCA}
}
@inproceedings{dark-silicon:isca:2011,
  title     = {Dark Silicon and the End of Multicore Scaling},
  author    = {Esmaeilzadeh, Hadi and Blem, Emily and St. Amant, Renee and Sankaralingam, Karthikeyan and Burger, Doug},
  year      = 2011,
  booktitle = {ISCA}
}
@inproceedings{biscuit:isca:2016,
  title     = {Biscuit: A Framework for Near-Data Processing of Big Data Workloads},
  author    = {Gu, Boncheol and Yoon, Andre S. and Bae, Duck-Ho and Jo, Insoon and Lee, Jinyoung and Yoon, Jonghyun and Kang, Jeong-Uk and Kwon, Moonsang and Yoon, Chanho and Cho, Sangyeun and Jeong, Jaeheon and Chang, Duckhyun},
  year      = 2016,
  booktitle = {ISCA}
}
@inproceedings{data-processing:msst:2013,
  title     = {Enabling cost-effective data processing with smart SSD},
  author    = {Kang, Yangwook and Kee, Yang-suk and Miller, Ethan L. and Park, Chanik},
  year      = 2013,
  booktitle = {MSST}
}
@inproceedings{query-processing:sigmod:2013,
  title     = {Query Processing on Smart SSDs: Opportunities and Challenges},
  author    = {Do, Jaeyoung and Kee, Yang-Suk and Patel, Jignesh M. and Park, Chanik and Park, Kwanghyun and DeWitt, David J.},
  year      = 2013,
  booktitle = {SIGMOD}
}
@inproceedings{tiwari:hotpower:2012,
  title     = {Reducing Data Movement Costs Using {Energy-Efficient}, Active Computation on {SSD}},
  author    = {Devesh Tiwari and Sudharshan S. Vazhkudai and Youngjae Kim and Xiaosong Ma and Simona Boboila and Peter J. Desnoyers},
  year      = 2012,
  booktitle = {HotPower}
}
@inproceedings{active-flash:fast:2013,
  title     = {Active Flash: Towards {Energy-Efficient}, {In-Situ} Data Analytics on {Extreme-Scale} Machines},
  author    = {Devesh Tiwari and Simona Boboila and Sudharshan Vazhkudai and Youngjae Kim and Xiaosong Ma and Peter Desnoyers and Yan Solihin},
  year      = 2013,
  booktitle = {FAST}
}
@inproceedings{summarizer:micro:2017,
  title     = {Summarizer: Trading Communication with Computing near Storage},
  author    = {Koo, Gunjae and Matam, Kiran Kumar and I, Te and Narra, H. V. Krishna Giri and Li, Jing and Tseng, Hung-Wei and Swanson, Steven and Annavaram, Murali},
  year      = 2017,
  booktitle = {MICRO}
}
@inproceedings{willow:osdi:2014,
  title     = {Willow: A {User-Programmable} {SSD}},
  author    = {Sudharsan Seshadri and Mark Gahagan and Sundaram Bhaskaran and Trevor Bunker and Arup De and Yanqin Jin and Yang Liu and Steven Swanson},
  year      = 2014,
  booktitle = {OSDI}
}
@inproceedings{dhar:isvlsi:2019,
  title     = {Near-Memory and In-Storage FPGA Acceleration for Emerging Cognitive Computing Workloads},
  author    = {Dhar, Ashutosh and Huang, Sitao and Xiong, Jinjun and Jamsek, Damir and Mesnet, Bruno and Huang, Jian and Kim, Nam Sung and Hwu, Wen-mei and Chen, Deming},
  year      = 2019,
  booktitle = {ISVLSI}
}
@inproceedings{dagger:asplos:2021,
  title     = {Dagger: Efficient and Fast RPCs in Cloud Microservices with near-Memory Reconfigurable NICs},
  author    = {Lazarev, Nikita and Xiang, Shaojie and Adit, Neil and Zhang, Zhiru and Delimitrou, Christina},
  year      = 2021,
  booktitle = {ASPLOS}
}
@inproceedings{crucial:middleware:2019,
  title     = {On the FaaS Track: Building Stateful Distributed Applications with Serverless Architectures},
  author    = {Barcelona-Pons, Daniel and S\'{a}nchez-Artigas, Marc and Par\'{\i}s, Gerard and Sutra, Pierre and Garc\'{\i}a-L\'{o}pez, Pedro},
  year      = 2019,
  booktitle = {Middleware}
}
@article{cloudburst:pvldb:2020,
  title     = {Cloudburst: Stateful Functions-as-a-Service},
  author    = {Sreekanti, Vikram and Wu, Chenggang and Lin, Xiayue Charles and Schleier-Smith, Johann and Gonzalez, Joseph E. and Hellerstein, Joseph M. and Tumanov, Alexey},
  year      = 2020,
  journal   = {Proc. VLDB Endow.},
  publisher = {VLDB Endowment},
  volume    = 13,
  number    = 12
}
@inproceedings{jiffy:eurosys:2022,
  title     = {Jiffy: Elastic Far-Memory for Stateful Serverless Analytics},
  author    = {Khandelwal, Anurag and Tang, Yupeng and Agarwal, Rachit and Akella, Aditya and Stoica, Ion},
  year      = 2022,
  booktitle = {EuroSys}
}
@inproceedings{deepstore:micro:2019,
  title     = {DeepStore: In-Storage Acceleration for Intelligent Queries},
  author    = {Mailthody, Vikram Sharma and Qureshi, Zaid and Liang, Weixin and Feng, Ziyan and de Gonzalo, Simon Garcia and Li, Youjie and Franke, Hubertus and Xiong, Jinjun and Huang, Jian and Hwu, Wen-mei},
  year      = 2019,
  booktitle = {MICRO}
}
@inproceedings{holistic-gnn:fast:2022,
  title     = {{Hardware/Software} {Co-Programmable} Framework for Computational {SSDs} to Accelerate Deep Learning Service on {Large-Scale} Graphs},
  author    = {Miryeong Kwon and Donghyun Gouk and Sangwon Lee and Myoungsoo Jung},
  year      = 2022,
  booktitle = {FAST}
}
@inproceedings{mithrilog:micro:2021,
  title     = {MithriLog: Near-Storage Accelerator for High-Performance Log Analytics},
  author    = {Kang, Seongyoung and An, Jiyoung and Kim, Jinpyo and Jun, Sang-Woo},
  year      = 2021,
  booktitle = {MICRO}
}
@inproceedings{smartrec:icpe:2022,
  title     = {Near-Storage Processing for Solid State Drive Based Recommendation Inference with SmartSSDs®},
  author    = {Soltaniyeh, Mohammadreza and Lagrange Moutinho Dos Reis, Veronica and Bryson, Matt and Yao, Xuebin and Martin, Richard P. and Nagarakatte, Santosh},
  year      = 2022,
  booktitle = {ICPE}
}
@misc{netezza,
  title        = {IBM PureData System for Analytics Architecture},
  url          = {https://www.redbooks.ibm.com/redpapers/pdfs/redp4725.pdf},
  howpublished = {\url{https://www.redbooks.ibm.com/redpapers/pdfs/redp4725.pdf}}
}
@inproceedings{nascent:fpga:2021,
  title     = {NASCENT: Near-Storage Acceleration of Database Sort on SmartSSD},
  author    = {Salamat, Sahand and Haj Aboutalebi, Armin and Khaleghi, Behnam and Lee, Joo Hwan and Ki, Yang Seok and Rosing, Tajana},
  year      = 2021,
  booktitle = {FPGA}
}
@article{nascent2:trets:2022,
  title   = {NASCENT2: Generic Near-Storage Sort Accelerator for Data Analytics on SmartSSD},
  author  = {Salamat, Sahand and Zhang, Hui and Ki, Yang Seok and Rosing, Tajana},
  year    = 2022,
  journal = {ACM Trans. Reconfigurable Technol. Syst.},
  volume  = 15,
  number  = 2
}
@inproceedings{insider:atc:2019,
  title     = {{INSIDER}: Designing {In-Storage} Computing System for Emerging {High-Performance} Drive},
  author    = {Zhenyuan Ruan and Tong He and Jason Cong},
  year      = 2019,
  booktitle = {ATC}
}
@inproceedings{speedo:socc:2021,
  title     = {Speedo: Fast Dispatch and Orchestration of Serverless Workflows},
  author    = {Daw, Nilanjan and Bellur, Umesh and Kulkarni, Purushottam},
  year      = 2021,
  booktitle = {SoCC}
}
@article{blastfunction:trets:2022,
  title   = {BlastFunction: A Full-Stack Framework Bringing FPGA Hardware Acceleration to Cloud-Native Applications},
  author  = {Damiani, Andrea and Fiscaletti, Giorgia and Bacis, Marco and Brondolin, Rolando and Santambrogio, Marco D.},
  year    = 2022,
  journal = {ACM Trans. Reconfigurable Technol. Syst.},
  volume  = 15,
  number  = 2
}
@inproceedings{lambda-nic:icdcs:2020,
  title     = {$\lambda$-NIC: Interactive Serverless Compute on Programmable SmartNICs},
  author    = {Choi, Sean and Shahbaz, Muhammad and Prabhakar, Balaji and Rosenblum, Mendel},
  year      = 2020,
  booktitle = {IEEE ICDCS}
}
@inproceedings{ezzeddine:cloudnet:2018,
  title     = {RESTful Hardware Microservices Using Reconfigurable Networked Accelerators in Cloud and Edge Datacenters},
  author    = {Ezzeddine, Mazen and Morcel, Raghid and Artail, Hassan and Saghir, Mazen A.R. and Akkary, Haitham and Hajj, Hazem},
  year      = 2018,
  booktitle = {IEEE CloudNet}
}
@inproceedings{numpywren:socc:2020,
  title     = {Serverless Linear Algebra},
  author    = {Shankar, Vaishaal and Krauth, Karl and Vodrahalli, Kailas and Pu, Qifan and Recht, Benjamin and Stoica, Ion and Ragan-Kelley, Jonathan and Jonas, Eric and Venkataraman, Shivaram},
  year      = 2020,
  booktitle = {SoCC}
}
@inproceedings{locus:nsdi:2019,
  title     = {Shuffling, Fast and Slow: Scalable Analytics on Serverless Infrastructure},
  author    = {Qifan Pu and Shivaram Venkataraman and Ion Stoica},
  year      = 2019,
  booktitle = {NSDI}
}
@inproceedings{sonic:atc:2021,
  title     = {{SONIC}: Application-aware Data Passing for Chained Serverless Applications},
  author    = {Ashraf Mahgoub and Karthick Shankar and Subrata Mitra and Ana Klimovic and Somali Chaterji and Saurabh Bagchi},
  year      = 2021,
  booktitle = {ATC}
}
@inproceedings{pocket:osdi:2018,
  title     = {Pocket: Elastic Ephemeral Storage for Serverless Analytics},
  author    = {Ana Klimovic and Yawen Wang and Patrick Stuedi and Animesh Trivedi and Jonas Pfefferle and Christos Kozyrakis},
  year      = 2018,
  booktitle = {OSDI}
}
@misc{twitter,
  title        = {Decomposing Twitter: Adventures in Service-Oriented Architecture},
  url          = {https://www.slideshare.net/InfoQ/decomposing-twitter-adventures-in-serviceoriented-architecture},
  howpublished = {\url{https://www.slideshare.net/InfoQ/decomposing-twitter-adventures-in-serviceoriented-architecture}}
}
@misc{industry,
  title = {The Evolution of Microservices},
  url   = {http://www.slideshare.net/adriancockcroft/ microservices-workshop-craft-conference.}
}
@misc{amazon,
  title        = {Deep Learning on GPU Instances},
  url          = {https://aws.amazon.com/machine-learning/accelerate-machine-learning-P3/},
  howpublished = {\url{https://aws.amazon.com/machine-learning/accelerate-machine-learning-P3/}}
}
@misc{nvidia-pascal,
  title        = {{Nvidia Pascal Architecture}},
  url          = {https://www.nvidia.com/en-us/data-center/pascal-gpu-architecture/},
  howpublished = {\url{https://www.nvidia.com/en-us/data-center/pascal-gpu-architecture/}}
}
@misc{tx2,
  title        = {{Nvidia Jetson TX2 Module}},
  url          = {https://developer.nvidia.com/embedded/jetson-tx2},
  howpublished = {\url{https://developer.nvidia.com/embedded/jetson-tx2}}
}
@misc{onnxruntime:2021,
  title        = {ONNX Runtime},
  howpublished = {\url{https://onnxruntime.ai/}}
}
@article{heterogenous:sigops:2020,
  title  = {The Increasing Heterogeneity of Cloud Hardware and What It Means for Systems},
  author = {Christina Delimitrou},
  year   = 2020,
  url    = {https://www.sigops.org/2020/the-increasing-heterogeneity-of-cloud-hardware-and-what-it-means-for-systems/}
}
@inproceedings{tailbench:iiswc:2016,
  title     = {Tailbench: a benchmark suite and evaluation methodology for latency-critical applications},
  author    = {Kasture, Harshad and Sanchez, Daniel},
  year      = 2016,
  booktitle = {IISWC}
}
@inproceedings{catapult:isca:2014,
  title     = {A Reconfigurable Fabric for Accelerating Large-Scale Datacenter Services},
  author    = {Andrew Putnam and Adrian Caulfield and Eric Chung and Derek Chiou and Kypros Constantinides and John Demme and Hadi Esmaeilzadeh and Jeremy Fowers and Gopi Prashanth and Jan Gray and Michael Haselman and Scott Hauck and Stephen Heil and Amir Hormati and Joo-Young Kim and Sitaram Lanka and James R. Larus and Eric Peterson and Aaron Smith and Jason Thong and Phillip Yi Xiao and Doug Burger},
  year      = 2014,
  booktitle = {ISCA}
}
@inproceedings{llvm:cgo:2004,
  title     = {{LLVM}: A compilation framework for lifelong program analysis \& transformation},
  author    = {Lattner, Chris and Adve, Vikram},
  year      = 2004,
  booktitle = {CGO}
}
@inproceedings{paragon:asplos:2013,
  title     = {Paragon: QoS-Aware Scheduling for Heterogeneous Datacenters},
  author    = {Delimitrou, Christina and Kozyrakis, Christos},
  year      = 2013,
  booktitle = {ASPLOS}
}
@inproceedings{learned-index:sigmod:2018,
  title     = {The Case for Learned Index Structures},
  author    = {Kraska, Tim and Beutel, Alex and Chi, Ed H. and Dean, Jeffrey and Polyzotis, Neoklis},
  year      = 2018,
  booktitle = {SIGMOD}
}
@inproceedings{linnos:osdi:2020,
  title     = {{LinnOS}: Predictability on Unpredictable Flash Storage with a Light Neural Network},
  author    = {Mingzhe Hao and Levent Toksoz and Nanqinqin Li and Edward Edberg Halim and Henry Hoffmann and Haryadi S. Gunawi},
  year      = 2020,
  booktitle = {OSDI}
}
@inproceedings{kml:hotstorage:2021,
  title     = {A Machine Learning Framework to Improve Storage System Performance},
  author    = {Akgun, Ibrahim Umit and Aydin, Ali Selman and Shaikh, Aadil and Velikov, Lukas and Zadok, Erez},
  year      = 2021,
  booktitle = {HotStorage}
}
@inproceedings{hydra:nsdi:2019,
  title     = {Hydra: a federated resource manager for data-center scale analytics},
  author    = {Carlo Curino and Subru Krishnan and Konstantinos Karanasos and Sriram Rao and Giovanni M. Fumarola and Botong Huang and Kishore Chaliparambil and Arun Suresh and Young Chen and Solom Heddaya and Roni Burd and Sarvesh Sakalanaga and Chris Douglas and Bill Ramsey and Raghu Ramakrishnan},
  year      = 2019,
  booktitle = {NSDI}
}
@inproceedings{yarn:socc:2013,
  title     = {Apache Hadoop YARN: Yet Another Resource Negotiator},
  author    = {Vavilapalli, Vinod Kumar and Murthy, Arun C. and Douglas, Chris and Agarwal, Sharad and Konar, Mahadev and Evans, Robert and Graves, Thomas and Lowe, Jason and Shah, Hitesh and Seth, Siddharth and Saha, Bikas and Curino, Carlo and O'Malley, Owen and Radia, Sanjay and Reed, Benjamin and Baldeschwieler, Eric},
  year      = 2013,
  booktitle = {SoCC}
}
@inproceedings{mesos:nsdi:2011,
  title     = {Mesos: A Platform for Fine-Grained Resource Sharing in the Data Center},
  author    = {Hindman, Benjamin and Konwinski, Andy and Zaharia, Matei and Ghodsi, Ali and Joseph, Anthony D. and Katz, Randy and Shenker, Scott and Stoica, Ion},
  year      = 2011,
  booktitle = {NSDI}
}
@inproceedings{grandslam:eurosys:2019,
  title     = {GrandSLAm: Guaranteeing SLAs for Jobs in Microservices Execution Frameworks},
  author    = {Kannan, Ram Srivatsa and Subramanian, Lavanya and Raju, Ashwin and Ahn, Jeongseob and Mars, Jason and Tang, Lingjia},
  year      = 2019,
  booktitle = {EuroSys}
}
@inproceedings{wisp:socc:2017,
  title     = {Distributed Resource Management across Process Boundaries},
  author    = {Suresh, Lalith and Bodik, Peter and Menache, Ishai and Canini, Marco and Ciucu, Florin},
  year      = 2017,
  booktitle = {SoCC}
}
@inproceedings{softsku:isca:2019,
  title     = {SoftSKU: Optimizing Server Architectures for Microservice Diversity @scale},
  author    = {Sriraman, Akshitha and Dhanotia, Abhishek and Wenisch, Thomas F.},
  year      = 2019,
  booktitle = {ISCA}
}
@inproceedings{nightcore:asplos:2021,
  title     = {Nightcore: Efficient and Scalable Serverless Computing for Latency-Sensitive, Interactive Microservices},
  author    = {Jia, Zhipeng and Witchel, Emmett},
  year      = 2021,
  booktitle = {ASPLOS}
}
@inproceedings{sagedb:cidr:2019,
  title     = {Sagedb: A learned database system},
  author    = {Kraska, Tim and Alizadeh, Mohammad and Beutel, Alex and Chi, Ed H and Ding, Jialin and Kristo, Ani and Leclerc, Guillaume and Madden, Samuel and Mao, Hongzi and Nathan, Vikram},
  year      = 2021,
  booktitle = {CIDR}
}
@article{learned-os:sigops-review:2019,
  title      = {"Learned": Operating Systems},
  author     = {Zhang, Yiying and Huang, Yutong},
  journal    = {SIGOPS Oper. Syst. Rev.},
  volume     = 53,
  number     = 1,
  issue_date = {July 2019}
}
@inproceedings{death-star-bench:asplos:2019,
  title     = {{An Open-Source Benchmark Suite for Microservices and Their Hardware-Software Implications for Cloud and Edge Systems}},
  author    = {Yu Gan and Yanqi Zhang and Dailun Cheng and Ankitha Shetty and Priyal Rathi and Nayantara Katarki and Ariana Bruno and Justin Hu and Brian Ritchken and Brendon Jackson and Kelvin Hu and Meghna Pancholi and Brett Clancy and Chris Colen and Fukang Wen and Catherine Leung and Siyuan Wang and Leon Zaruvinsky and Mateo Espinosa and Yuan He and Christina Delimitrou},
  year      = 2019,
  booktitle = {ASPLOS}
}
@inproceedings{sage:asplos:2021,
  title     = {{Sage: Practical and Scalable ML-Driven Performance Debugging in Microservices}},
  author    = {Yu Gan and Mingyu Liang and Sundar Dev and David Lo and Christina Delimitrou},
  year      = 2021,
  booktitle = {ASPLOS}
}
@inproceedings{sinan:asplos:2021,
  title     = {Sinan: ML-Based and QoS-Aware Resource Management for Cloud Microservices},
  author    = {Zhang, Yanqi and Hua, Weizhe and Zhou, Zhuangzhuang and Suh, G. Edward and Delimitrou, Christina},
  year      = 2021,
  booktitle = {ASPLOS}
}
@inproceedings{gavel:osdi:2020,
  title     = {$\{$Heterogeneity-Aware$\}$ Cluster Scheduling Policies for Deep Learning Workloads},
  author    = {Narayanan, Deepak and Santhanam, Keshav and Kazhamiaka, Fiodar and Phanishayee, Amar and Zaharia, Matei},
  year      = 2020,
  booktitle = {OSDI}
}
@inproceedings{apollo:osdi:2014,
  title     = {Apollo: Scalable and Coordinated Scheduling for {Cloud-Scale} Computing},
  author    = {Eric Boutin and Jaliya Ekanayake and Wei Lin and Bing Shi and Jingren Zhou and Zhengping Qian and Ming Wu and Lidong Zhou},
  year      = 2014,
  booktitle = {OSDI}
}
@inproceedings{sparrow:sosp:2013,
  title     = {Sparrow: Distributed, Low Latency Scheduling},
  author    = {Ousterhout, Kay and Wendell, Patrick and Zaharia, Matei and Stoica, Ion},
  year      = 2013,
  booktitle = {SOSP}
}
@inproceedings{borg:eurosys:2015,
  title     = {Large-Scale Cluster Management at Google with Borg},
  author    = {Verma, Abhishek and Pedrosa, Luis and Korupolu, Madhukar and Oppenheimer, David and Tune, Eric and Wilkes, John},
  year      = 2015,
  booktitle = {EuroSys}
}
@inproceedings{omega:eurosys:2013,
  title     = {Omega: Flexible, Scalable Schedulers for Large Compute Clusters},
  author    = {Schwarzkopf, Malte and Konwinski, Andy and Abd-El-Malek, Michael and Wilkes, John},
  year      = 2013,
  booktitle = {EuroSys}
}
@inproceedings{decima:sigcomm:2019,
  title     = {Learning Scheduling Algorithms for Data Processing Clusters},
  author    = {Mao, Hongzi and Schwarzkopf, Malte and Venkatakrishnan, Shaileshh Bojja and Meng, Zili and Alizadeh, Mohammad},
  year      = 2019,
  booktitle = {SIGCOMM}
}
@inproceedings{resnet:cvpr:2016,
  title     = {Deep Residual Learning for Image Recognition},
  author    = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  year      = 2016,
  booktitle = {CVPR}
}
@inproceedings{efficientnet:icml:2019,
  title     = {{E}fficient{N}et: Rethinking Model Scaling for Convolutional Neural Networks},
  author    = {Tan, Mingxing and Le, Quoc},
  year      = 2019,
  booktitle = {ICML}
}
@article{randomforest:acs:2003,
  title   = {Random forest: a classification and regression tool for compound classification and QSAR modeling},
  author  = {Svetnik, Vladimir and Liaw, Andy and Tong, Christopher and Culberson, J Christopher and Sheridan, Robert P and Feuston, Bradley P},
  year    = 2003,
  journal = {Journal of chemical information and computer sciences}
}
@inproceedings{DeepJS:ICBDC:2019,
  title     = {DeepJS: Job Scheduling Based on Deep Reinforcement Learning in Cloud Data Center},
  author    = {Li, Fengcun and Hu, Bo},
  year      = 2019,
  booktitle = {ICBDC}
}
@inproceedings{dadiannao:2014,
  title     = {Dadiannao: A machine-learning supercomputer},
  author    = {Chen, Yunji and Luo, Tao and Liu, Shaoli and Zhang, Shijin and He, Liqiang and Wang, Jia and Li, Ling and Chen, Tianshi and Xu, Zhiwei and Sun, Ninghui and others},
  year      = 2014,
  booktitle = {MICRO}
}
@article{eyeriss:2016,
  title   = {Eyeriss: An energy-efficient reconfigurable accelerator for deep convolutional neural networks},
  author  = {Chen, Yu-Hsin and Krishna, Tushar and Emer, Joel S and Sze, Vivienne},
  year    = 2016,
  journal = {JSSC}
}
@inproceedings{eie:2016,
  title     = {{EIE}: efficient inference engine on compressed deep neural network},
  author    = {Han, Song and Liu, Xingyu and Mao, Huizi and Pu, Jing and Pedram, Ardavan and Horowitz, Mark A and Dally, William J},
  year      = 2016,
  booktitle = {ISCA}
}
@inproceedings{stripes:2016,
  title     = {Stripes: Bit-serial deep neural network computing},
  author    = {Judd, Patrick and Albericio, Jorge and Hetherington, Tayler and Aamodt, Tor M and Moshovos, Andreas},
  year      = 2016,
  booktitle = {MICRO}
}
@inproceedings{cnvlutin:2016,
  title     = {Cnvlutin: Ineffectual-neuron-free deep neural network computing},
  author    = {Albericio, Jorge and Judd, Patrick and Hetherington, Tayler and Aamodt, Tor and Jerger, Natalie Enright and Moshovos, Andreas},
  year      = 2016,
  booktitle = {ISCA}
}
@inproceedings{tpu:2017,
  title     = {In-datacenter performance analysis of a tensor processing unit},
  author    = {Jouppi, Norman P and Young, Cliff and Patil, Nishant and Patterson, David and Agrawal, Gaurav and Bajwa, Raminder and Bates, Sarah and Bhatia, Suresh and Boden, Nan and Borchers, Al and others},
  year      = 2017,
  booktitle = {ISCA}
}
@inproceedings{tetris::asplos2017,
  title     = {{TETRIS}: Scalable and efficient neural network acceleration with 3d memory},
  author    = {Gao, Mingyu and Pu, Jing and Yang, Xuan and Horowitz, Mark and Kozyrakis, Christos},
  year      = 2017,
  booktitle = {ASPLOS}
}
@inproceedings{bitfusion::isca:2018,
  title     = {{Bit Fusion}: Bit-level dynamically composable architecture for accelerating deep neural networks},
  author    = {Sharma, Hardik and Park, Jongse and Suda, Naveen and Lai, Liangzhen and Chau, Benson and Chandra, Vikas and Esmaeilzadeh, Hadi},
  year      = 2018,
  booktitle = {ISCA}
}
@inproceedings{brainwave:isca:2018,
  title     = {A Configurable Cloud-Scale DNN Processor for Real-Time AI},
  author    = {Fowers, Jeremy and Ovtcharov, Kalin and Papamichael, Michael and Massengill, Todd and Liu, Ming and Lo, Daniel and Alkalay, Shlomi and Haselman, Michael and Adams, Logan and Ghandi, Mahdi and Heil, Stephen and Patel, Prerak and Sapek, Adam and Weisz, Gabriel and Woods, Lisa and Lanka, Sitaram and Reinhardt, Steven K. and Caulfield, Adrian M. and Chung, Eric S. and Burger, Doug},
  year      = 2018,
  booktitle = {ISCA}
}
@inproceedings{verigoodml:iccad:2021,
  title     = {VeriGOOD-ML: An Open-Source Flow for Automated ML Hardware Synthesis},
  author    = {Esmaeilzadeh, Hadi and Ghodrati, Soroush and Gu, Jie and Guo, Shiyu and Kahng, Andrew B and Kim, Joon Kyung and Kinzer, Sean and Mahapatra, Rohan and Manasi, Susmita Dey and Mascarenhas, Edwin and others},
  year      = 2021,
  booktitle = {ICCAD}
}
@inproceedings{amc:2018,
  title     = {{AMC}: {AutoML} for model compression and acceleration on mobile devices},
  author    = {He, Yihui and Lin, Ji and Liu, Zhijian and Wang, Hanrui and Li, Li-Jia and Han, Song},
  year      = 2018,
  booktitle = {ECCV}
}
@inproceedings{device_placement:2017,
  title     = {Device Placement Optimization with Reinforcement Learning},
  author    = {Mirhoseini, Azalia and Pham, Hieu and Le, Quoc V and Steiner, Benoit and Larsen, Rasmus and Zhou, Yuefeng and Kumar, Naveen and Norouzi, Mohammad and Bengio, Samy and Dean, Jeff},
  year      = 2017,
  booktitle = {ICML}
}
@inproceedings{post:2018,
  title     = {Post: Device placement with cross-entropy minimization and proximal policy optimization},
  author    = {Gao, Yuanxiang and Chen, Li and Li, Baochun},
  year      = 2018,
  booktitle = {NeurIPS},
  pages     = {9971--9980}
}
@inproceedings{haq:2019,
  title     = {{HAQ}: Hardware-Aware Automated Quantization with Mixed Precision},
  author    = {Wang, Kuan and Liu, Zhijian and Lin, Yujun and Lin, Ji and Han, Song},
  year      = 2019,
  booktitle = {CVPR}
}
@article{releq:2018,
  title   = {ReLeQ: A reinforcement learning approach for deep quantization of neural networks},
  author  = {Elthakeb, Ahmed T and Pilligundla, Prannoy and Yazdanbakhsh, Amir and Kinzer, Sean and Esmaeilzadeh, Hadi},
  year    = 2018,
  journal = {arXiv},
  opturl  = {https://arxiv.org/pdf/1811.01704.pdf}
}
@inproceedings{autotvm:2018,
  title     = {Learning to optimize tensor programs},
  author    = {Chen, Tianqi and Zheng, Lianmin and Yan, Eddie and Jiang, Ziheng and Moreau, Thierry and Ceze, Luis and Guestrin, Carlos and Krishnamurthy, Arvind},
  year      = 2018,
  booktitle = {NeurIPS},
  pages     = {3389--3400}
}
@inproceedings{chameleon:2020,
  title     = {Chameleon: Adaptive Code Optimization for Expedited Deep Neural Network Compilation},
  author    = {Byung Hoon Ahn and Prannoy Pilligundla and Hadi Esmaeilzadeh},
  year      = 2020,
  booktitle = {ICLR},
  opturl    = {https://openreview.net/forum?id=rygG4AVFvH}
}
%% cleanup citations are up there
%%%--------------------------------------------------------------------------------%%%
%% other citations are down there
@inbook{megargel-springer-2020,
  title     = {Migrating from Monoliths to Cloud-Based Microservices: A Banking Industry Example},
  author    = {Megargel, Alan and Shankararaman, Venky and Walker, David K.},
  year      = 2020,
  booktitle = {Software Engineering in the Era of Cloud Computing},
  publisher = {Springer International Publishing},
  address   = {Cham},
  pages     = {85--108},
  doi       = {10.1007/978-3-030-33624-0_4},
  isbn      = {978-3-030-33624-0},
  url       = {https://doi.org/10.1007/978-3-030-33624-0_4},
  editor    = {Ramachandran, Muthu and Mahmood, Zaigham}
}
@misc{nginx,
  title   = {nginx documentation},
  url     = {https://nginx.org/en/docs/},
  urldate = {2021-06-23},
  file    = {nginx documentation:C\:\\Users\\rohan\\Zotero\\storage\\N85UMAU4\\docs.html:text/html}
}
@article{nishtala_memcached_nsdi13,
  title    = {Scaling {Memcache} at {Facebook}},
  author   = {Nishtala, Rajesh and Fugal, Hans and Grimm, Steven and Kwiatkowski, Marc and Lee, Herman and Li, Harry C and McElroy, Ryan and Paleczny, Mike and Peek, Daniel and Saab, Paul and Stafford, David and Tung, Tony and Venkataramani, Venkateshwaran},
  pages    = 14,
  abstract = {Memcached is a well known, simple, inmemory caching solution. This paper describes how Facebook leverages memcached as a building block to construct and scale a distributed key-value store that supports the worldâ€™s largest social network. Our system handles billions of requests per second and holds trillions of items to deliver a rich experience for over a billion users around the world.},
  language = {en},
  file     = {Nishtala et al. - Scaling Memcache at Facebook.pdf:C\:\\Users\\rohan\\Zotero\\storage\\W8XVSE2V\\Nishtala et al. - Scaling Memcache at Facebook.pdf:application/pdf}
}
@article{jeffrey-cacm-2013,
  title      = {The Tail at Scale},
  author     = {Dean, Jeffrey and Barroso, Luiz Andr\'{e}},
  year       = 2013,
  month      = feb,
  journal    = {Commun. ACM},
  publisher  = {Association for Computing Machinery},
  address    = {New York, NY, USA},
  volume     = 56,
  number     = 2,
  pages      = {74–80},
  doi        = {10.1145/2408776.2408794},
  issn       = {0001-0782},
  url        = {https://doi.org/10.1145/2408776.2408794},
  issue_date = {February 2013},
  abstract   = {Software techniques that tolerate latency variability are vital to building responsive large-scale Web services.},
  numpages   = 7
}
@article{barroso-datacenter-2018,
  title   = {The Datacenter as a Computer: Designing Warehouse-Scale Machines, Third Edition},
  author  = {Barroso, Luiz AndrÃ© and HÃ¶lzle, Urs and Ranganathan, Parthasarathy},
  year    = 2018,
  journal = {Synthesis Lectures on Computer Architecture},
  volume  = 13,
  number  = 3,
  pages   = {i-189},
  doi     = {10.2200/S00874ED3V01Y201809CAC046},
  url     = {https://doi.org/10.2200/S00874ED3V01Y201809CAC046},
  eprint  = {https://doi.org/10.2200/S00874ED3V01Y201809CAC046}
}
@inproceedings{sriraman-fb-isca19,
  title     = {SoftSKU: Optimizing Server Architectures for Microservice Diversity @scale},
  author    = {Sriraman, Akshitha and Dhanotia, Abhishek and Wenisch, Thomas F.},
  year      = 2019,
  booktitle = {Proceedings of the 46th International Symposium on Computer Architecture},
  location  = {Phoenix, Arizona},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  series    = {ISCA '19},
  pages     = {513–526},
  doi       = {10.1145/3307650.3322227},
  isbn      = 9781450366694,
  url       = {https://doi.org/10.1145/3307650.3322227},
  numpages  = 14,
  keywords  = {soft SKU, resource fungibility, microservice}
}
@incollection{megargel-microservices-2020,
  title      = {Migrating from {Monoliths} to {Cloud}-{Based} {Microservices}: {A} {Banking} {Industry} {Example}},
  shorttitle = {Migrating from {Monoliths} to {Cloud}-{Based} {Microservices}},
  author     = {Megargel, Alan and Shankararaman, Venky and Walker, David K.},
  year       = 2020,
  booktitle  = {Software {Engineering} in the {Era} of {Cloud} {Computing}},
  publisher  = {Springer International Publishing},
  address    = {Cham},
  pages      = {85--108},
  doi        = {10.1007/978-3-030-33624-0_4},
  isbn       = {978-3-030-33623-3 978-3-030-33624-0},
  url        = {http://link.springer.com/10.1007/978-3-030-33624-0_4},
  urldate    = {2021-06-23},
  note       = {Series Title: Computer Communications and Networks},
  language   = {en},
  editor     = {Ramachandran, Muthu and Mahmood, Zaigham}
}
@misc{google-microservices,
  title    = {Microservices {Architecture} on {Google} {App} {Engine}},
  journal  = {Google Cloud},
  url      = {https://cloud.google.com/appengine/docs/standard/python/microservices-on-app-engine},
  urldate  = {2021-06-23},
  language = {en},
  file     = {Snapshot:C\:\\Users\\rohan\\Zotero\\storage\\MAGPR9NB\\microservices-on-app-engine.html:text/html}
}
@article{cockcroft-microservices,
  title    = {Evolution of {Microservices}},
  author   = {Cockcroft, Adrian},
  pages    = 66,
  language = {en},
  file     = {Cockcroft - Evolution of Microservices.pdf:C\:\\Users\\rohan\\Zotero\\storage\\XBP9GLWY\\Cockcroft - Evolution of Microservices.pdf:application/pdf}
}
@misc{twitter-microservices,
  title   = {Decomposing {Twitter}: {Adventures} in {Service}-{Oriented} {Architecture}},
  url     = {https://www.infoq.com/presentations/twitter-soa/},
  urldate = {2021-06-23},
  file    = {Decomposing Twitter\: Adventures in Service-Oriented Architecture:C\:\\Users\\rohan\\Zotero\\storage\\6S5RWXAL\\twitter-soa.html:text/html}
}
@misc{amazon-microservices,
  title      = {What are {Microservices}? {\textbar} {AWS}},
  shorttitle = {What are {Microservices}?},
  journal    = {Amazon Web Services, Inc.},
  url        = {https://aws.amazon.com/microservices/},
  urldate    = {2021-06-23},
  abstract   = {Microservices are an architectural and organizational approach to software development where software is composed of small independent services that communicate over well-defined APIs. These services are owned by small, self-contained teams. Microservices architectures make applications easier to scale and faster to develop, enabling innovation and accelerating time-to-market for new features.},
  language   = {en-US},
  file       = {Snapshot:C\:\\Users\\rohan\\Zotero\\storage\\W7XJGKAW\\microservices.html:text/html}
}
@misc{microsoft-microservices,
  title    = {Microservices architecture style - {Azure} {Application} {Architecture} {Guide}},
  author   = {doodlemania2},
  url      = {https://docs.microsoft.com/en-us/azure/architecture/guide/architecture-styles/microservices},
  urldate  = {2021-06-23},
  abstract = {Describes benefits, challenges, and best practices for microservices architectures on Azure.},
  language = {en-us},
  file     = {Snapshot:C\:\\Users\\rohan\\Zotero\\storage\\CTMXPJTF\\microservices.html:text/html}
}
@inproceedings{cruz-restful-SCC2020,
  title     = {FirecREST: a RESTful API to HPC systems},
  author    = {Cruz, Felipe A. and Dabin, Alejandro J. and Dorsch, Juan Pablo and Koutsaniti, Eirini and Lezcano, Nelson F. and Martinasso, Maxime and Petrusic, Dario},
  year      = 2020,
  booktitle = {2020 IEEE/ACM International Workshop on Interoperability of Supercomputing and Cloud Technologies (SuperCompCloud)},
  volume    = {},
  number    = {},
  pages     = {21--26},
  doi       = {10.1109/SuperCompCloud51944.2020.00009}
}
@misc{fielding-resful-2000,
  title   = {Fielding {Dissertation}: {CHAPTER} 5: {Representational} {State} {Transfer} ({REST})},
  url     = {https://www.ics.uci.edu/~fielding/pubs/dissertation/rest_arch_style.htm},
  urldate = {2021-06-23},
  file    = {Fielding Dissertation\: CHAPTER 5\: Representational State Transfer (REST):C\:\\Users\\rohan\\Zotero\\storage\\CD5QKY5P\\rest_arch_style.html:text/html}
}
@phdthesis{nelson-rpc-1981,
  title     = {Remote Procedure Call},
  author    = {Nelson, Bruce Jay},
  year      = 1981,
  publisher = {Carnegie Mellon University},
  address   = {USA},
  note      = {AAI8204168}
}
@inproceedings{conn-oltp-olap-databases,
  title  = {OLTP and OLAP data integration: A review of feasible implementation methods and architectures for real time data analysis},
  author = {Conn, Samuel},
  year   = 2005,
  month  = {05},
  pages  = {515--520},
  doi    = {10.1109/SECON.2005.1423297},
  isbn   = {0-7803-8865-8}
}
@article{intel-xeon,
  title    = {Product {Brief}: {Intel}Â® {Xeon}Â® {Scalable} {Platform}},
  pages    = 14,
  language = {en},
  file     = {Product Brief IntelÂ® XeonÂ® Scalable Platform.pdf:C\:\\Users\\rohan\\Zotero\\storage\\YBCA3BMS\\Product Brief IntelÂ® XeonÂ® Scalable Platform.pdf:application/pdf}
}
@article{keckler-gpu-micro2011,
  title   = {GPUs and the Future of Parallel Computing},
  author  = {Keckler, Stephen W. and Dally, William J. and Khailany, Brucek and Garland, Michael and Glasco, David},
  year    = 2011,
  journal = {IEEE Micro},
  volume  = 31,
  number  = 5,
  pages   = {7--17},
  doi     = {10.1109/MM.2011.89}
}
@inproceedings{jouppi-tpu-isca2017,
  title     = {In-datacenter performance analysis of a tensor processing unit},
  author    = {Jouppi, Norman P. and Young, Cliff and Patil, Nishant and Patterson, David and Agrawal, Gaurav and Bajwa, Raminder and Bates, Sarah and Bhatia, Suresh and Boden, Nan and Borchers, Al and Boyle, Rick and Cantin, Pierre-luc and Chao, Clifford and Clark, Chris and Coriell, Jeremy and Daley, Mike and Dau, Matt and Dean, Jeffrey and Gelb, Ben and Ghaemmaghami, Tara Vazir and Gottipati, Rajendra and Gulland, William and Hagmann, Robert and Ho, C. Richard and Hogberg, Doug and Hu, John and Hundt, Robert and Hurt, Dan and Ibarz, Julian and Jaffey, Aaron and Jaworski, Alek and Kaplan, Alexander and Khaitan, Harshit and Killebrew, Daniel and Koch, Andy and Kumar, Naveen and Lacy, Steve and Laudon, James and Law, James and Le, Diemthu and Leary, Chris and Liu, Zhuyuan and Lucke, Kyle and Lundin, Alan and MacKean, Gordon and Maggiore, Adriana and Mahony, Maire and Miller, Kieran and Nagarajan, Rahul and Narayanaswami, Ravi and Ni, Ray and Nix, Kathy and Norrie, Thomas and Omernick, Mark and Penukonda, Narayana and Phelps, Andy and Ross, Jonathan and Ross, Matt and Salek, Amir and Samadiani, Emad and Severn, Chris and Sizikov, Gregory and Snelham, Matthew and Souter, Jed and Steinberg, Dan and Swing, Andy and Tan, Mercedes and Thorson, Gregory and Tian, Bo and Toma, Horia and Tuttle, Erick and Vasudevan, Vijay and Walter, Richard and Wang, Walter and Wilcox, Eric and Yoon, Doe Hyun},
  year      = 2017,
  booktitle = {2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)},
  volume    = {},
  number    = {},
  pages     = {1--12},
  doi       = {10.1145/3079856.3080246}
}
@article{hutt_inferentia,
  title    = {Deliver high performance {ML} inference with {AWS} {Inferentia}},
  author   = {Hutt, Gadi and Viswanathan, Vibhav and Nadolski, Adam},
  pages    = 37,
  language = {en},
  file     = {Hutt et al. - Deliver high performance ML inference with AWS Inf.pdf:C\:\\Users\\rohan\\Zotero\\storage\\WP6DLJQY\\Hutt et al. - Deliver high performance ML inference with AWS Inf.pdf:application/pdf}
}
@inproceedings{dark_silicon_isca2011,
  title     = {Dark silicon and the end of multicore scaling},
  author    = {Esmaeilzadeh, Hadi and Blem, Emily and Amant, Renée St. and Sankaralingam, Karthikeyan and Burger, Doug},
  year      = 2011,
  booktitle = {2011 38th Annual International Symposium on Computer Architecture (ISCA)},
  volume    = {},
  number    = {},
  pages     = {365--376},
  doi       = {}
}
@article{fpga_systhesis_lec_2013,
  title   = {Data Processing on FPGAs},
  author  = {Teubner ,  Jens  and  Woods ,  Louis},
  year    = 2013,
  journal = {Synthesis Lectures on Data Management},
  volume  = 5,
  number  = 2,
  pages   = {1--118},
  doi     = {10.2200/S00514ED1V01Y201306DTM035},
  url     = {https://doi.org/10.2200/S00514ED1V01Y201306DTM035},
  eprint  = {https://doi.org/10.2200/S00514ED1V01Y201306DTM035}
}
@misc{amazon_ec2_f1,
  title    = {Amazon {EC2} {F1} {Instances}},
  journal  = {Amazon Web Services, Inc.},
  url      = {https://aws.amazon.com/ec2/instance-types/f1/},
  urldate  = {2021-06-24},
  abstract = {Low-cost, burstable general purpose Amazon EC2 instances},
  language = {en-US}
}
@misc{microsoft_azure_fpga,
  title    = {Deploy {ML} models to {FPGAs} - {Azure} {Machine} {Learning}},
  author   = {jpe316},
  url      = {https://docs.microsoft.com/en-us/azure/machine-learning/how-to-deploy-fpga-web-service},
  urldate  = {2021-06-24},
  abstract = {Learn about field-programmable gate arrays. You can deploy a web service on an FPGA with Azure Machine Learning for ultra-low latency inference.},
  language = {en-us},
  file     = {Snapshot:C\:\\Users\\rohan\\Zotero\\storage\\DIHAESA7\\how-to-deploy-fpga-web-service.html:text/html}
}
@inproceedings{choi-prema-hpca2020,
  title     = {PREMA: A Predictive Multi-Task Scheduling Algorithm For Preemptible Neural Processing Units},
  author    = {Choi, Yujeong and Rhu, Minsoo},
  year      = 2020,
  booktitle = {2020 IEEE International Symposium on High Performance Computer Architecture (HPCA)},
  volume    = {},
  number    = {},
  pages     = {220--233},
  doi       = {10.1109/HPCA47549.2020.00027}
}
@inproceedings{ghodrati_planaria_micro2020,
  title     = {Planaria: Dynamic Architecture Fission for Spatial Multi-Tenant Acceleration of Deep Neural Networks},
  author    = {Ghodrati, Soroush and Ahn, Byung Hoon and Kyung Kim, Joon and Kinzer, Sean and Yatham, Brahmendra Reddy and Alla, Navateja and Sharma, Hardik and Alian, Mohammad and Ebrahimi, Eiman and Kim, Nam Sung and Young, Cliff and Esmaeilzadeh, Hadi},
  year      = 2020,
  booktitle = {2020 53rd Annual IEEE/ACM International Symposium on Microarchitecture (MICRO)},
  volume    = {},
  number    = {},
  pages     = {681--697},
  doi       = {10.1109/MICRO50266.2020.00062}
}
@misc{intel_optane,
  title    = {IntelÂ® {Optane}â„¢ {Persistent} {Memory} {Product} {Brief}},
  journal  = {Intel},
  url      = {https://www.intel.com/content/www/us/en/products/docs/memory-storage/optane-persistent-memory/optane-dc-persistent-memory-brief.html},
  urldate  = {2021-06-24},
  abstract = {Brief: See the benefits and specifications of IntelÂ® Optaneâ„¢ persistent memory, which improves memory storage with affordability and efficiency.},
  language = {en},
  file     = {Snapshot:C\:\\Users\\rohan\\Zotero\\storage\\EJUUBG3W\\optane-dc-persistent-memory-brief.html:text/html}
}
@misc{amazon_s3,
  title   = {Cloud {Object} {Storage} {\textbar} {Store} \& {Retrieve} {Data} {Anywhere} {\textbar} {Amazon} {Simple} {Storage} {Service} ({S3})},
  url     = {https://aws.amazon.com/s3/},
  urldate = {2021-06-24}
}
@misc{azure_storage,
  title    = {Azure {Cloud} {Storage} {Solutions} and {Services} {\textbar} {Microsoft} {Azure}},
  url      = {https://azure.microsoft.com/en-us/product-categories/storage/},
  urldate  = {2021-06-24},
  abstract = {View a variety of Azure storage products and tools. Find massively scalable cloud storage systems to secure your data from unauthorized access.},
  language = {en}
}
@article{smartNIC,
  title   = {Introducing SmartNICs in Server-based Data Plane Processing: the DDoS Mitigation Use Case},
  author  = {Miano, Sebastiano and Doriguzzi Corin, Roberto and Risso, Fulvio and Siracusa, Domenico and Sommese, Raffaele},
  year    = 2019,
  month   = {08},
  journal = {IEEE Access},
  volume  = {PP},
  pages   = {1--1},
  doi     = {10.1109/ACCESS.2019.2933491}
}
@inproceedings{infiniband,
  title     = {An efficient implementation of the InfiniBand link layer},
  author    = {Jaesung Lee and Hyuk-Jae Lee and Kyoung Park},
  year      = 2003,
  booktitle = {IEEE International [Systems-on-Chip] SOC Conference, 2003. Proceedings.},
  volume    = {},
  number    = {},
  pages     = {355--358},
  doi       = {10.1109/SOC.2003.1241542}
}
%%%%%%% Data center
@inproceedings{korolija-os-abstractions-osdi2020,
  title     = {Do {OS} abstractions make sense on FPGAs?},
  author    = {Dario Korolija and Timothy Roscoe and Gustavo Alonso},
  year      = 2020,
  month     = nov,
  booktitle = {14th {USENIX} Symposium on Operating Systems Design and Implementation ({OSDI} 20)},
  publisher = {{USENIX} Association},
  pages     = {991--1010},
  isbn      = {978-1-939133-19-9},
  url       = {https://www.usenix.org/conference/osdi20/presentation/roscoe}
}
@inproceedings{shan-legoos-osdi2019,
  title     = {LegoOS: A Disseminated, Distributed {OS} for Hardware Resource Disaggregation},
  author    = {Yizhou Shan and Yutong Huang and Yilun Chen and Yiying Zhang},
  year      = 2019,
  month     = jul,
  booktitle = {2019 {USENIX} Annual Technical Conference ({USENIX} {ATC} 19)},
  publisher = {{USENIX} Association},
  address   = {Renton, WA},
  url       = {https://www.usenix.org/conference/atc19/presentation/shan}
}
@inproceedings{patel-clite-hpca2020,
  title     = {CLITE: Efficient and QoS-Aware Co-Location of Multiple Latency-Critical Jobs for Warehouse Scale Computers},
  author    = {Patel, Tirthak and Tiwari, Devesh},
  year      = 2020,
  booktitle = {2020 IEEE International Symposium on High Performance Computer Architecture (HPCA)},
  volume    = {},
  number    = {},
  pages     = {193--206},
  doi       = {10.1109/HPCA47549.2020.00025}
}
@inproceedings{lösch-scheduling-hpca20,
  title     = {Performance-centric scheduling with task migration for a heterogeneous compute node in the data center},
  author    = {Lösch, Achim and Beisel, Tobias and Kenter, Tobias and Plessl, Christian and Platzner, Marco},
  year      = 2016,
  booktitle = {2016 Design, Automation   Test in Europe Conference   Exhibition (DATE)},
  volume    = {},
  number    = {},
  pages     = {912--917},
  doi       = {}
}
@article{Lsch2016PerformancecentricSW,
  title   = {Performance-centric scheduling with task migration for a heterogeneous compute node in the data center},
  author  = {Achim L{\"o}sch and Tobias Beisel and Tobias Kenter and Christian Plessl and M. Platzner},
  year    = 2016,
  journal = {2016 Design, Automation \& Test in Europe Conference \& Exhibition (DATE)},
  pages   = {912--917}
}
@inproceedings{yadwadkar-inference-serving-hotos19,
  title     = {A Case for Managed and Model-Less Inference Serving},
  author    = {Yadwadkar, Neeraja J. and Romero, Francisco and Li, Qian and Kozyrakis, Christos},
  year      = 2019,
  booktitle = {Proceedings of the Workshop on Hot Topics in Operating Systems},
  location  = {Bertinoro, Italy},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  series    = {HotOS '19},
  pages     = {184–191},
  doi       = {10.1145/3317550.3321443},
  isbn      = 9781450367271,
  url       = {https://doi.org/10.1145/3317550.3321443},
  numpages  = 8,
  keywords  = {Automatic Resource Management, Model-less, Inference Serving}
}
@article{delimitrou-paragon-asplos13,
  title      = {Paragon: QoS-Aware Scheduling for Heterogeneous Datacenters},
  author     = {Delimitrou, Christina and Kozyrakis, Christos},
  year       = 2013,
  month      = mar,
  journal    = {ASPLOS '13},
  publisher  = {Association for Computing Machinery},
  address    = {New York, NY, USA},
  volume     = 48,
  number     = 4,
  pages      = {77–88},
  doi        = {10.1145/2499368.2451125},
  issn       = {0362-1340},
  url        = {https://doi.org/10.1145/2499368.2451125},
  issue_date = {April 2013},
  numpages   = 12,
  keywords   = {interference, cloud computing, datacenter, scheduling, heterogeneity, qos}
}
@inproceedings{lo-heracles-isca15,
  title     = {Heracles: Improving Resource Efficiency at Scale},
  author    = {Lo, David and Cheng, Liqun and Govindaraju, Rama and Ranganathan, Parthasarathy and Kozyrakis, Christos},
  year      = 2015,
  booktitle = {Proceedings of the 42nd Annual International Symposium on Computer Architecture},
  location  = {Portland, Oregon},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  series    = {ISCA '15},
  pages     = {450–462},
  doi       = {10.1145/2749469.2749475},
  isbn      = 9781450334020,
  url       = {https://doi.org/10.1145/2749469.2749475},
  numpages  = 13
}
@article{putnam-catapult-micro15,
  title   = {A Reconfigurable Fabric for Accelerating Large-Scale Datacenter Services},
  author  = {Putnam, Andrew and Caulfield, Adrian M. and Chung, Eric S. and Chiou, Derek and Constantinides, Kypros and Demme, John and Esmaeilzadeh, Hadi and Fowers, Jeremy and Gopal, Gopi Prashanth and Gray, Jan and Haselman, Michael and Hauck, Scott and Heil, Stephen and Hormati, Amir and Kim, Joo-Young and Lanka, Sitaram and Larus, James and Peterson, Eric and Pope, Simon and Smith, Aaron and Thong, Jason and Xiao, Phillip Yi and Burger, Doug},
  year    = 2015,
  journal = {IEEE Micro},
  volume  = 35,
  number  = 3,
  pages   = {10--22},
  doi     = {10.1109/MM.2015.42}
}
%% load balancers
@article{dorigo-ant-colony-96,
  title   = {Ant system: optimization by a colony of cooperating agents},
  author  = {Dorigo, M. and Maniezzo, V. and Colorni, A.},
  year    = 1996,
  journal = {IEEE Transactions on Systems, Man, and Cybernetics, Part B (Cybernetics)},
  volume  = 26,
  number  = 1,
  pages   = {29--41},
  doi     = {10.1109/3477.484436}
}
@article{eberhart-swarm-95,
  title   = {A new optimizer using particle swarm theory},
  author  = {R. Eberhart and J. Kennedy},
  year    = 1995,
  journal = {MHS'95. Proceedings of the Sixth International Symposium on Micro Machine and Human Science},
  pages   = {39--43}
}
@inproceedings{goldberg-geneticAI-88,
  title  = {Genetic Algorithms in Search Optimization and Machine Learning},
  author = {D. Goldberg},
  year   = 1988
}
@article{coffman-binpacking-04,
  title  = {An introduction to bin packing},
  author = {Coffman, Ed and Csirik, Janos and Johnson, David and Woeginger, Gerhard},
  year   = 2004,
  month  = {04},
  pages  = {}
}
@inproceedings{leinberger-dynamicbin-99,
  title     = {Multi-capacity bin packing algorithms with applications to job scheduling under multiple constraints},
  author    = {Leinberger, W. and Karypis, G. and Kumar, V.},
  year      = 1999,
  booktitle = {Proceedings of the 1999 International Conference on Parallel Processing},
  volume    = {},
  number    = {},
  pages     = {404--412},
  doi       = {10.1109/ICPP.1999.797428}
}
@article{zhao-cluster-load-balance-16,
  title   = {A Heuristic Clustering-Based Task Deployment Approach for Load Balancing Using Bayes Theorem in Cloud Environment},
  author  = {Zhao, Jia and Yang, Kun and Wei, Xiaohui and Ding, Yan and Hu, Liang and Xu, Gaochao},
  year    = 2016,
  journal = {IEEE Transactions on Parallel and Distributed Systems},
  volume  = 27,
  number  = 2,
  pages   = {305--316},
  doi     = {10.1109/TPDS.2015.2402655}
}
@article{chandakanna_sliding_2015,
  title    = {A sliding window based {Self}-{Learning} and {Adaptive} {Load} {Balancer}},
  author   = {Chandakanna, Veerabhadra R. and Vatsavayi, Valli K.},
  year     = 2015,
  journal  = {Journal of Network and Computer Applications},
  volume   = 56,
  pages    = {188--205},
  doi      = {https://doi.org/10.1016/j.jnca.2015.07.001},
  issn     = {1084-8045},
  url      = {https://www.sciencedirect.com/science/article/pii/S1084804515001538},
  keywords = {Cluster computing, Distributed systems, Dynamic load balancing, Scheduling}
}
@inproceedings{zhang-flex-isca21,
  title     = {Flex: High-Availability Datacenters With Zero Reserved Power},
  author    = {Zhang, Chaojie and Kumbhare, Alok and Manousakis, Ioannis and Zhang, Deli and Misra, Pulkit and Assis, Rod and Woolcock, Kyle and Mahalingam, Nithish and Warrier, Brijesh and Gauthier, David and Kunnath, Lalu and Solomon, Steve and Morales, Osvaldo and Fontoura, Marcus and Bianchini, Ricardo},
  year      = 2021,
  month     = {June},
  booktitle = {Proceedings of the International Symposium on Computer Architecture (ISCA'21)},
  url       = {https://www.microsoft.com/en-us/research/publication/flex-high-availability-datacenters-with-zero-reserved-power/}
}
@inproceedings{govindan-power-provisioning-eurosys09,
  title     = {Statistical Profiling-Based Techniques for Effective Power Provisioning in Data Centers},
  author    = {Govindan, Sriram and Choi, Jeonghwan and Urgaonkar, Bhuvan and Sivasubramaniam, Anand and Baldini, Andrea},
  year      = 2009,
  booktitle = {Proceedings of the 4th ACM European Conference on Computer Systems},
  location  = {Nuremberg, Germany},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  series    = {EuroSys '09},
  pages     = {317–330},
  doi       = {10.1145/1519065.1519099},
  isbn      = 9781605584829,
  url       = {https://doi.org/10.1145/1519065.1519099},
  numpages  = 14,
  keywords  = {power provisioning, profiling, power supply hierarchy, data center, prediction}
}
@article{zha_hetero-vital_isca21,
  title    = {Hetero-{ViTAL}: {A} {Virtualization} {Stack} for {Heterogeneous} {FPGA} {Clusters}},
  author   = {Zha, Yue},
  journal  = {ISCA 2021},
  pages    = 14,
  url      = {https://conferences.computer.org/iscapub/pdfs/ISCA2021-4ghucdBnCWYB7ES2Pe4YdT/333300a470/333300a470.pdf},
  language = {en},
  file     = {Zha - Hetero-ViTAL A Virtualization Stack for Heterogen.pdf:C\:\\Users\\rohan\\Zotero\\storage\\4J5A6TZJ\\Zha - Hetero-ViTAL A Virtualization Stack for Heterogen.pdf:application/pdf}
}
%%%%%%%% Microservices
@inproceedings{deathstarbench:asplos:2019,
  title     = {An Open-Source Benchmark Suite for Microservices and Their Hardware-Software Implications for Cloud and Edge Systems},
  author    = {Gan, Yu and Zhang, Yanqi and Cheng, Dailun and Shetty, Ankitha and Rathi, Priyal and Katarki, Nayan and Bruno, Ariana and Hu, Justin and Ritchken, Brian and Jackson, Brendon and Hu, Kelvin and Pancholi, Meghna and He, Yuan and Clancy, Brett and Colen, Chris and Wen, Fukang and Leung, Catherine and Wang, Siyuan and Zaruvinsky, Leon and Espinosa, Mateo and Lin, Rick and Liu, Zhongling and Padilla, Jake and Delimitrou, Christina},
  year      = 2019,
  booktitle = {ASPLOS}
}
@article{namiot-uservices-arch-14,
  title   = {On Micro-services Architecture},
  author  = {Namiot, Dmitry and sneps-sneppe, Manfred},
  year    = 2014,
  month   = {09},
  journal = {Interenational Journal of Open Information Technologies},
  volume  = 2,
  pages   = {24--27}
}
@article{ojika-faam-19,
  title   = {FaaM: FPGA-as-a-Microservice - A Case Study for Data Compression},
  author  = {{Ojika, David} and {Gordon-Ross, Ann} and {Lam, Herman} and {Patel, Bhavesh}},
  year    = 2019,
  journal = {EPJ Web Conf.},
  volume  = 214,
  pages   = {07029},
  doi     = {10.1051/epjconf/201921407029},
  url     = {https://doi.org/10.1051/epjconf/201921407029}
}
@inproceedings{seer:asplos:2019,
  title     = {Seer: Leveraging Big Data to Navigate the Complexity of Performance Debugging in Cloud Microservices},
  author    = {Gan, Yu and Zhang, Yanqi and Hu, Kelvin and Cheng, Dailun and He, Yuan and Pancholi, Meghna and Delimitrou, Christina},
  year      = 2019,
  booktitle = {ASPLOS}
}
@inproceedings{ueda-uservice-workload-iiswc16,
  title     = {Workload characterization for microservices},
  author    = {Ueda, Takanori and Nakaike, Takuya and Ohara, Moriyoshi},
  year      = 2016,
  booktitle = {IISWC}
}
@inproceedings{boucher-micro-security-atc18,
  title     = {Putting the "Micro" Back in Microservice},
  author    = {Sol Boucher and Anuj Kalia and David G. Andersen and Michael Kaminsky},
  year      = 2018,
  month     = jul,
  booktitle = {2018 {USENIX} Annual Technical Conference ({USENIX} {ATC} 18)},
  publisher = {{USENIX} Association},
  address   = {Boston, MA},
  pages     = {645--650},
  isbn      = {978-1-939133-01-4},
  url       = {https://www.usenix.org/conference/atc18/presentation/boucher}
}
@inproceedings{synapse:eurosys:2015,
  title     = {Synapse: A Microservices Architecture for Heterogeneous-Database Web Applications},
  author    = {Viennot, Nicolas and L\'{e}cuyer, Mathias and Bell, Jonathan and Geambasu, Roxana and Nieh, Jason},
  year      = 2015,
  booktitle = {EuroSys}
}
@article{al-fpgaaas-tsc20,
  title   = {FPGAaaS: A Survey of Infrastructures and Systems},
  author  = {Al Qassem, Lamees Mahmoud and Stouraitis, Thanos and Damiani, Ernesto and Elfadel, Ibrahim Abe M.},
  year    = 2020,
  journal = {IEEE Transactions on Services Computing},
  volume  = {},
  number  = {},
  pages   = {1--1},
  doi     = {10.1109/TSC.2020.2976012}
}
@article{ferdman-cloudsuite-asplos12,
  title      = {Clearing the Clouds: A Study of Emerging Scale-out Workloads on Modern Hardware},
  author     = {Ferdman, Michael and Adileh, Almutaz and Kocberber, Onur and Volos, Stavros and Alisafaee, Mohammad and Jevdjic, Djordje and Kaynak, Cansu and Popescu, Adrian Daniel and Ailamaki, Anastasia and Falsafi, Babak},
  year       = 2012,
  month      = mar,
  journal    = {SIGPLAN Not.},
  publisher  = {Association for Computing Machinery},
  address    = {New York, NY, USA},
  volume     = 47,
  number     = 4,
  pages      = {37–48},
  doi        = {10.1145/2248487.2150982},
  issn       = {0362-1340},
  url        = {https://doi.org/10.1145/2248487.2150982},
  issue_date = {April 2012},
  numpages   = 12,
  keywords   = {workload characterization, architectural evaluation, design insights, cloud computing}
}
@article{kasture-tailbench-iiswc16,
  title   = {Tailbench: a benchmark suite and evaluation methodology for latency-critical applications},
  author  = {H. Kasture and Daniel S{\'a}nchez},
  year    = 2016,
  journal = {2016 IEEE International Symposium on Workload Characterization (IISWC)},
  pages   = {1--10}
}
%%%%%%% Serverless
@inproceedings{choi-lambdanic-sigcomm19,
  title     = {$\lambda-NIC$: Interactive Serverless Compute on SmartNICs},
  author    = {Choi, Sean and Shahbaz, Muhammad and Prabhakar, Balaji and Rosenblum, Mendel},
  year      = 2019,
  booktitle = {Proceedings of the ACM SIGCOMM 2019 Conference Posters and Demos},
  location  = {Beijing, China},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  series    = {SIGCOMM Posters and Demos '19},
  pages     = {151–152},
  doi       = {10.1145/3342280.3342341},
  isbn      = 9781450368865,
  url       = {https://doi.org/10.1145/3342280.3342341},
  numpages  = 2,
  keywords  = {and NPU, SmartNIC, Serverless compute, P4}
}
@article{romero-infaasam-arvix19,
  title   = {INFaaS: A Model-less Inference Serving System.},
  author  = {Francisco Romero and Qian Li and N. Yadwadkar and C. Kozyrakis},
  year    = 2019,
  journal = {arXiv: Distributed, Parallel, and Cluster Computing}
}
@inproceedings{kaffes:socc:2019,
  title     = {Centralized Core-Granular Scheduling for Serverless Functions},
  author    = {Kaffes, Kostis and Yadwadkar, Neeraja J. and Kozyrakis, Christos},
  year      = 2019,
  booktitle = {SoCC}
}
@inproceedings{usetl:apsys:2019,
  title     = {USETL: Unikernels for Serverless Extract Transform and Load Why Should You Settle for Less?},
  author    = {Fingler, Henrique and Akshintala, Amogh and Rossbach, Christopher J.},
  year      = 2019,
  booktitle = {APSys}
}
@misc{google_app_platform,
  title    = {App {Engine} {Application} {Platform}},
  journal  = {Google Cloud},
  url      = {https://cloud.google.com/appengine},
  urldate  = {2021-06-25},
  abstract = {Google App Engine lets app developers build scalable web and mobile back ends in any programming language on a fully managed serverless platform.},
  language = {en},
  file     = {Snapshot:C\:\\Users\\rohan\\Zotero\\storage\\NXSR8U2Q\\appengine.html:text/html}
}
@misc{azure_service_fabric:2021,
  title = {Azure Service Fabric},
  year  = 2021,
  url   = {https://azure.microsoft.com/en-us/services/service-fabric/}
}
@inproceedings{cheriton-vkernel-sosp83,
  title     = {The Distributed V Kernel and Its Performance for Diskless Workstations},
  author    = {Cheriton, David R. and Zwaenepoel, Willy},
  year      = 1983,
  booktitle = {Proceedings of the Ninth ACM Symposium on Operating Systems Principles},
  location  = {Bretton Woods, New Hampshire, USA},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  series    = {SOSP '83},
  pages     = {129–140},
  doi       = {10.1145/800217.806621},
  isbn      = {0897911156},
  url       = {https://doi.org/10.1145/800217.806621},
  numpages  = 12
}
@article{ousterhout-sprite-88,
  title   = {The Sprite network operating system},
  author  = {Ousterhout, J.K. and Cherenson, A.R. and Douglis, F. and Nelson, M.N. and Welch, B.B.},
  year    = 1988,
  journal = {Computer},
  volume  = 21,
  number  = 2,
  pages   = {23--36},
  doi     = {10.1109/2.16}
}
@article{ritchie-unix-acm74,
  title      = {The UNIX Time-Sharing System},
  author     = {Ritchie, Dennis M. and Thompson, Ken},
  year       = 1974,
  month      = jul,
  journal    = {Commun. ACM},
  publisher  = {Association for Computing Machinery},
  address    = {New York, NY, USA},
  volume     = 17,
  number     = 7,
  pages      = {365–375},
  doi        = {10.1145/361011.361061},
  issn       = {0001-0782},
  url        = {https://doi.org/10.1145/361011.361061},
  issue_date = {July 1974},
  numpages   = 11,
  keywords   = {file system, time-sharing, command language, operating system, PDP-11}
}
@misc{protobuf,
  title        = {Protocol Buffers},
  author       = {Google Inc.},
  year         = 2015,
  url          = {https://developers.google.com/protocol-buffers/},
  howpublished = {\url{https://developers.google.com/protocol-buffers/}}
}
@misc{intel-dpdk,
  title  = {Data Plane Development Kit ({DPDK})},
  author = {Linux Foundation},
  year   = 2008,
  url    = {http://www.dpdk.org}
}
@inproceedings{madhavapeddy-unikernel-asplos13,
  title     = {Unikernels: Library Operating Systems for the Cloud},
  author    = {Madhavapeddy, Anil and Mortier, Richard and Rotsos, Charalampos and Scott, David and Singh, Balraj and Gazagnaire, Thomas and Smith, Steven and Hand, Steven and Crowcroft, Jon},
  year      = 2013,
  booktitle = {Proceedings of the Eighteenth International Conference on Architectural Support for Programming Languages and Operating Systems},
  location  = {Houston, Texas, USA},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  series    = {ASPLOS '13},
  pages     = {461–472},
  doi       = {10.1145/2451116.2451167},
  isbn      = 9781450318709,
  url       = {https://doi.org/10.1145/2451116.2451167},
  numpages  = 12,
  keywords  = {hypervisor, microkernel, functional programming}
}
@inproceedings{morin-kerrighed-pp03,
  title     = {Kerrighed: A Single System Image Cluster Operating System for High Performance Computing},
  author    = {Morin, Christine and Lottiaux, Renaud and Vall{\'e}e, Geoffroy and Gallard, Pascal and Utard, Ga{\"e}l and Badrinath, R. and Rilling, Louis},
  year      = 2003,
  booktitle = {Euro-Par 2003 Parallel Processing},
  publisher = {Springer Berlin Heidelberg},
  address   = {Berlin, Heidelberg},
  pages     = {1291--1294},
  isbn      = {978-3-540-45209-6},
  editor    = {Kosch, Harald and B{\"o}sz{\"o}rm{\'e}nyi, L{\'a}szl{\'o} and Hellwagner, Hermann}
}
@inproceedings{feniks:apsys:2017,
  title     = {The Feniks FPGA Operating System for Cloud Computing},
  author    = {Zhang, Jiansong and Xiong, Yongqiang and Xu, Ningyi and Shu, Ran and Li, Bojie and Cheng, Peng and Chen, Guo and Moscibroda, Thomas},
  year      = 2017,
  booktitle = {APSys}
}
@inproceedings{hamilton-fpgaos-fccm14,
  title     = {Scheduling Mixed-Architecture Processes in Tightly Coupled FPGA-CPU Reconfigurable Computers},
  author    = {Hamilton, Brandon Kyle and Inggs, Michael and So, Hayden Kwok Hay},
  year      = 2014,
  booktitle = {FCCM}
}
@inproceedings{amorphos:osdi:2018,
  title     = {Sharing, Protection, and Compatibility for Reconfigurable Fabric with Amorphos},
  author    = {Khawaja, Ahmed and Landgraf, Joshua and Prakash, Rohith and Wei, Michael and Schkufza, Eric and Rossbach, Christopher J.},
  year      = 2018,
  booktitle = {OSDI}
}
@inproceedings{fleming-leapos-fpl14,
  title   = {The LEAP FPGA operating system},
  author  = {Fleming, Kermin and Yang, Hsin-Jung and Adler, Michael and Emer, Joel},
  year    = 2014,
  month   = {09},
  journal = {Conference Digest - 24th International Conference on Field Programmable Logic and Applications, FPL 2014},
  pages   = {},
  doi     = {10.1109/FPL.2014.6927488}
}
@inproceedings{zha-vital-asplos20,
  title     = {Virtualizing FPGAs in the Cloud},
  author    = {Zha, Yue and Li, Jing},
  year      = 2020,
  booktitle = {Proceedings of the Twenty-Fifth International Conference on Architectural Support for Programming Languages and Operating Systems},
  location  = {Lausanne, Switzerland},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  series    = {ASPLOS '20},
  pages     = {845–858},
  doi       = {10.1145/3373376.3378491},
  isbn      = 9781450371025,
  url       = {https://doi.org/10.1145/3373376.3378491},
  numpages  = 14,
  keywords  = {compilation framework, system abstraction, cloud computing, scale-out acceleration, field-programmable gate arrays}
}
@inproceedings{quasar:asplos:2014,
  title     = {Quasar: Resource-Efficient and QoS-Aware Cluster Management},
  author    = {Delimitrou, Christina and Kozyrakis, Christos},
  year      = 2014,
  booktitle = {ASPLOS}
}
@misc{xilinx-alveo-u50:2021,
  title = {Alveo {U50} {Data} {Center} {Accelerator} {Card}},
  year  = 2021,
  url   = {https://www.xilinx.com/products/boards-and-kits/alveo/u50.html}
}
@inproceedings{azure-nic:nsdi:2018,
  title     = {Azure Accelerated Networking: SmartNICs in the Public Cloud},
  author    = {Daniel Firestone and Andrew Putnam and Sambhrama Mundkur and Derek Chiou and Alireza Dabagh and Mike Andrewartha and Hari Angepat and Vivek Bhanu and Adrian Caulfield and Eric Chung and Harish Kumar Chandrappa and Somesh Chaturmohta and Matt Humphrey and Jack Lavier and Norman Lam and Fengfen Liu and Kalin Ovtcharov and Jitu Padhye and Gautham Popuri and Shachar Raindel and Tejas Sapre and Mark Shaw and Gabriel Silva and Madhan Sivakumar and Nisheeth Srivastava and Anshuman Verma and Qasim Zuhair and Deepak Bansal and Doug Burger and Kushagra Vaid and David A. Maltz and Albert Greenberg},
  year      = 2018,
  booktitle = {NSDI}
}
@inproceedings{hyperloop:sigcomm:2018,
  title     = {Hyperloop: Group-Based NIC-Offloading to Accelerate Replicated Transactions in Multi-Tenant Storage Systems},
  author    = {Kim, Daehyeok and Memaripour, Amirsaman and Badam, Anirudh and Zhu, Yibo and Liu, Hongqiang Harry and Padhye, Jitu and Raindel, Shachar and Swanson, Steven and Sekar, Vyas and Seshan, Srinivasan},
  year      = 2018,
  booktitle = {SIGCOMM}
}
@misc{samsung_nvme_ssd,
  title   = {980 {PRO} {PCIe} 4.0 {NVMe} {SSD} {2TB} {Memory} \& {Storage} - {MZ}-{V8P2T0B}/{AM} {\textbar} {Samsung} {US}},
  url     = {https://www.samsung.com/us/computing/memory-storage/solid-state-drives/980-pro-pcie-4-0-nvme-ssd-2tb-mz-v8p2t0b-am/},
  urldate = {2021-06-26}
}
@inproceedings{koka-silicon-photonics-isca10,
  title     = {Silicon-Photonic Network Architectures for Scalable, Power-Efficient Multi-Chip Systems},
  author    = {Koka, Pranay and McCracken, Michael O. and Schwetman, Herb and Zheng, Xuezhe and Ho, Ron and Krishnamoorthy, Ashok V.},
  year      = 2010,
  booktitle = {Proceedings of the 37th Annual International Symposium on Computer Architecture},
  location  = {Saint-Malo, France},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  series    = {ISCA '10},
  pages     = {117–128},
  doi       = {10.1145/1815961.1815977},
  isbn      = 9781450300537,
  url       = {https://doi.org/10.1145/1815961.1815977},
  numpages  = 12,
  keywords  = {nanophotonics, interconnection networks}
}
@book{sutton-reinforcement-learning-18,
  title     = {Reinforcement Learning: An Introduction},
  author    = {Sutton, Richard S. and Barto, Andrew G.},
  year      = 2018,
  publisher = {A Bradford Book},
  address   = {Cambridge, MA, USA},
  isbn      = {0262039249},
  abstract  = {The significantly expanded and updated new edition of a widely used text on reinforcement learning, one of the most active research areas in artificial intelligence. Reinforcement learning, one of the most active research areas in artificial intelligence, is a computational approach to learning whereby an agent tries to maximize the total amount of reward it receives while interacting with a complex, uncertain environment. In Reinforcement Learning, Richard Sutton and Andrew Barto provide a clear and simple account of the field's key ideas and algorithms. This second edition has been significantly expanded and updated, presenting new topics and updating coverage of other topics. Like the first edition, this second edition focuses on core online learning algorithms, with the more mathematical material set off in shaded boxes. Part I covers as much of reinforcement learning as possible without going beyond the tabular case for which exact solutions can be found. Many algorithms presented in this part are new to the second edition, including UCB, Expected Sarsa, and Double Learning. Part II extends these ideas to function approximation, with new sections on such topics as artificial neural networks and the Fourier basis, and offers expanded treatment of off-policy learning and policy-gradient methods. Part III has new chapters on reinforcement learning's relationships to psychology and neuroscience, as well as an updated case-studies chapter including AlphaGo and AlphaGo Zero, Atari game playing, and IBM Watson's wagering strategy. The final chapter discusses the future societal impacts of reinforcement learning.}
}
@article{sun-res-util1-16,
  title   = {Optimizing Resource Utilization of a Data Center},
  author  = {Sun, Xiang and Ansari, Nirwan and Wang, Ruopeng},
  year    = 2016,
  journal = {IEEE Communications Surveys   Tutorials},
  volume  = 18,
  number  = 4,
  pages   = {2822--2846},
  doi     = {10.1109/COMST.2016.2558203}
}
@article{sung-ombm-19,
  title   = {OMBM: optimized memory bandwidth management for ensuring QoS and high server utilization},
  author  = {Sung, Hanul and Min, Jeesoo and Ha, Sujin and Eom, Hyeonsang},
  year    = 2019,
  month   = {03},
  journal = {Cluster Computing},
  volume  = 22,
  pages   = {},
  doi     = {10.1007/s10586-018-2828-1}
}
@article{herlocker-collaborative-filtering-04,
  title      = {Evaluating Collaborative Filtering Recommender Systems},
  author     = {Herlocker, Jonathan L. and Konstan, Joseph A. and Terveen, Loren G. and Riedl, John T.},
  year       = 2004,
  month      = jan,
  journal    = {ACM Trans. Inf. Syst.},
  publisher  = {Association for Computing Machinery},
  address    = {New York, NY, USA},
  volume     = 22,
  number     = 1,
  pages      = {5–53},
  doi        = {10.1145/963770.963772},
  issn       = {1046-8188},
  url        = {https://doi.org/10.1145/963770.963772},
  issue_date = {January 2004},
  numpages   = 49,
  keywords   = {evaluation, Collaborative filtering, metrics, recommender systems}
}
@book{dnn-cnn-rnn-2016,
  title     = {Deep Learning Neural Networks: Design and Case Studies},
  author    = {Graupe, Daniel},
  year      = 2016,
  publisher = {World Scientific Publishing Co., Inc.},
  address   = {USA},
  isbn      = 9789813146457
}
@misc{google_gvisor:2021,
  title        = {{gVisor}},
  url          = {https://gvisor.dev/},
  howpublished = {\url{https://gvisor.dev/}}
}
@inproceedings{bit-fusion:isca:2018,
  title     = {Bit Fusion: Bit-Level Dynamically Composable Architecture for Accelerating Deep Neural Network},
  author    = {Sharma, Hardik and Park, Jongse and Suda, Naveen and Lai, Liangzhen and Chau, Benson and Chandra, Vikas and Esmaeilzadeh, Hadi},
  year      = 2018,
  booktitle = {ISCA}
}
@inproceedings{neural-acc:micro:2012,
  title     = {Neural Acceleration for General-Purpose Approximate Programs},
  author    = {Esmaeilzadeh, Hadi and Sampson, Adrian and Ceze, Luis and Burger, Doug},
  year      = 2012,
  booktitle = {MICRO}
}
@inproceedings{deepscaletool:iscas:2021,
  title     = {DeepScaleTool: A Tool for the Accurate Estimation of Technology Scaling in the Deep-Submicron Era},
  author    = {Sarangi, Satyabrata and Baas, Bevan},
  year      = 2021,
  booktitle = {ISCAS}
}
@inproceedings{InfiniCache:FAST:2020,
  title     = {{InfiniCache}: Exploiting Ephemeral Serverless Functions to Build a {Cost-Effective} Memory Cache},
  author    = {Ao Wang and Jingyuan Zhang and Xiaolong Ma and Ali Anwar and Lukas Rupprecht and Dimitrios Skourtis and Vasily Tarasov and Feng Yan and Yue Cheng},
  year      = 2020,
  booktitle = {FAST}
}
@inproceedings{polymath:hpca:2021,
  title     = {A Computational Stack for Cross-Domain Acceleration},
  author    = {Kinzer, Sean and Kim, Joon Kyung and Ghodrati, Soroush and Yatham, Brahmendra and Althoff, Alric and Mahajan, Divya and Lerner, Sorin and Esmaeilzadeh, Hadi},
  year      = 2021,
  booktitle = {HPCA}
}
@inproceedings{fcn:cvpr:2015,
  title     = {Fully Convolutional Networks for Semantic Segmentation},
  author    = {Long, Jonathan and Shelhamer, Evan and Darrell, Trevor},
  year      = 2015,
  month     = {June},
  booktitle = {CVPR}
}
@article{bert:arxiv:2018,
  title   = {Bert: Pre-training of deep bidirectional transformers for language understanding},
  author  = {Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
  year    = 2018,
  journal = {arXiv preprint}
}
@article{opengpt2:xrds:2020,
  title   = {OpenGPT-2: Open Language Models and Implications of Generated Text},
  author  = {Cohen, Vanya and Gokaslan, Aaron},
  year    = 2020,
  journal = {XRDS},
  volume  = 27,
  number  = 1
}
@misc{photo-recognition:aws,
  title        = {Use-amazon-rekognition-to-build-an-end-to-end-serverless-photo-recognition-system},
  url          = {https://aws.amazon.com/blogs/machine-learning/use-amazon-rekognition-to-build-an-end-to-end-serverless-photo-recognition-system/},
  howpublished = {\url{https://aws.amazon.com/blogs/machine-learning/use-amazon-rekognition-to-build-an-end-to-end-serverless-photo-recognition-system/}}
}
@misc{asset-damage-detection:aws,
  title        = {{Spot product defects using computer vision to automate quality inspection}},
  url          = {https://aws.amazon.com/lookout-for-vision/},
  howpublished = {\url{https://aws.amazon.com/lookout-for-vision/}}
}
@misc{content_moderation-1:aws,
  title        = {{AWS Content Moderation using Serverless}},
  url          = {https://docs.aws.amazon.com/rekognition/latest/dg/moderation.html?pg=ln&sec=ft},
  howpublished = {\url{https://docs.aws.amazon.com/rekognition/latest/dg/moderation.html?pg=ln&sec=ft}}
}
@misc{content_moderation-2:aws,
  title        = {{Content Moderation in Social Media}},
  url          = {https://www.mygreatlearning.com/blog/content-moderation-in-social-media-with-aws-services/},
  howpublished = {\url{https://www.mygreatlearning.com/blog/content-moderation-in-social-media-with-aws-services/}}
}
@misc{workplace-safety-1:aws,
  title        = {{AWS Workplace Safety}},
  url          = {https://aws.amazon.com/rekognition/workplace-safety/},
  howpublished = {\url{https://aws.amazon.com/rekognition/workplace-safety/}}
}
@misc{aws-ppe-detection:aws-rekognition,
  title        = {{PPE Detection using Amazon Rekognition}},
  url          = {https://aws.amazon.com/blogs/machine-learning/automatically-detecting-personal-protective-equipment-on-persons-in-images-using-amazon-rekognition/},
  howpublished = {\url{https://aws.amazon.com/blogs/machine-learning/automatically-detecting-personal-protective-equipment-on-persons-in-images-using-amazon-rekognition/}}
}
@misc{clinical-analysis-1:intel,
  title        = {Using DNN to classify acute myeloidlymphoblastic},
  howpublished = {https://www.intel.com/content/www/us/en/developer/articles/technical/inception-v3-deep-convolutional-architecture-for-classifying-acute-myeloidlymphoblastic.html}
}
@misc{clinical-analysis-2:aws,
  title        = {Genomics using Serverless},
  url          = {https://aws.amazon.com/blogs/publicsector/genomic-and-medical-big-data-go-serverless/},
  howpublished = {\url{https://aws.amazon.com/blogs/publicsector/genomic-and-medical-big-data-go-serverless/}}
}
@misc{npl:aws-comprehend,
  title        = {{AWS Comprehend}},
  url          = {https://aws.amazon.com/comprehend/},
  howpublished = {\url{https://aws.amazon.com/comprehend/}}
}
@misc{nlp:chatbots:aws,
  title        = {{AWS Serverless-bot-framework}},
  url          = {https://aws.amazon.com/solutions/implementations/serverless-bot-framework/},
  howpublished = {\url{https://aws.amazon.com/solutions/implementations/serverless-bot-framework/}}
}
@misc{nlp:translation-1:aws,
  title        = {{AWS Translate}},
  url          = {https://aws.amazon.com/translate/},
  howpublished = {\url{https://aws.amazon.com/translate/}}
}
@misc{nlp:translation-2:aws,
  title        = {Translate Large Documents using Serverless Pipeline with Amazon Translate},
  url          = {https://aws.amazon.com/blogs/machine-learning/create-a-serverless-pipeline-to-translate-large-documents-with-amazon-translate/},
  howpublished = {\url{https://aws.amazon.com/blogs/machine-learning/create-a-serverless-pipeline-to-translate-large-documents-with-amazon-translate/}}
}
@article{smartssd:cal:2020,
  title   = {SmartSSD: FPGA Accelerated Near-Storage Data Analytics on SSD},
  author  = {Lee, Joo Hwan and Zhang, Hui and Lagrange, Veronica and Krishnamoorthy, Praveen and Zhao, Xiaodong and Ki, Yang Seok},
  year    = 2020,
  journal = {IEEE Computer Architecture Letters},
  volume  = 19,
  number  = 2
}
@misc{smartssd-gen2:2022,
  title        = {{Innovation With SmartSSD for Green Computing}},
  url          = {https://www.snia.org/sites/default/files/PM-Summit/2022/PMCS22-Ki-Innovation-With-Smart-SSD-for-Green-Computing.pdf},
  howpublished = {\url{https://www.snia.org/sites/default/files/PM-Summit/2022/PMCS22-Ki-Innovation-With-Smart-SSD-for-Green-Computing.pdf}}
}
@misc{xilinx-smartssd:2021,
  title        = {{Xilinx SmartSSD Computational Storage Drive Product Brief}},
  url          = {https://www.xilinx.com/content/dam/xilinx/publications/product-briefs/xilinx-smartssd-computational-storage-drive-product-brief.pdf},
  howpublished = {\url{https://www.xilinx.com/content/dam/xilinx/publications/product-briefs/xilinx-smartssd-computational-storage-drive-product-brief.pdf}}
}
@inproceedings{catalina:hpcc:2019,
  title     = {Accelerating HPC Applications Using Computational Storage Devices},
  author    = {Torabzadehkashi, Mahdi and Heydarigorji, Ali and Rezaei, Siavash and Bobarshad, Hosein and Alves, Vladimir and Bagherzadeh, Nader},
  year      = 2019,
  booktitle = {HPCC}
}
@misc{ngdsystems:power-limit,
  title        = {{NGD Systems Power limitation}},
  url          = {https://www.snia.org/sites/default/files/SDCEMEA/2020/7\%20-\%20Eli\%20Tiomkin\%20NGD\%20-\%20Computational\%20Storage.pdf},
  howpublished = {\url{https://www.snia.org/sites/default/files/SDCEMEA/2020/7\%20-\%20Eli\%20Tiomkin\%20NGD\%20-\%20Computational\%20Storage.pdf}}
}
@inproceedings{wisefuse:sigmetrics:2022,
  title     = {WISEFUSE: Workload Characterization and DAG Transformation for Serverless Workflows},
  author    = {Mahgoub, Ashraf and Yi, Edgardo Barsallo and Shankar, Karthick and Minocha, Eshaan and Elnikety, Sameh and Bagchi, Saurabh and Chaterji, Somali},
  year      = 2022,
  booktitle = {SIGMETRICS}
}
@inproceedings{basu:iiswc:2021,
  title     = {Characterizing and Mitigating the I/O Scalability Challenges for Serverless Applications},
  author    = {Roy, Rohan Basu and Patel, Tirthak and Tiwari, Devesh},
  year      = 2021,
  booktitle = {IISWC}
}
@inproceedings{tpuv4i:isca:2021,
  title     = {Ten Lessons From Three Generations Shaped Google’s TPUv4i : Industrial Product},
  author    = {Jouppi, Norman P. and Hyun Yoon, Doe and Ashcraft, Matthew and Gottscho, Mark and Jablin, Thomas B. and Kurian, George and Laudon, James and Li, Sheng and Ma, Peter and Ma, Xiaoyu and Norrie, Thomas and Patil, Nishant and Prasad, Sushma and Young, Cliff and Zhou, Zongwei and Patterson, David},
  year      = 2021,
  booktitle = {ISCA}
}
@misc{google-cloud-function-pipeline:2021,
  title        = {Serverless transformers pipeline on Google Cloud},
  url          = {https://huggingface.co/blog/how-to-deploy-a-pipeline-to-google-clouds},
  howpublished = {\url{https://huggingface.co/blog/how-to-deploy-a-pipeline-to-google-clouds}}
}
@misc{aws-lambda-pipeline:2020,
  title        = {AWS serverless data analytics pipeline reference architecture},
  url          = {https://aws.amazon.com/blogs/big-data/aws-serverless-data-analytics-pipeline-reference-architecture/},
  howpublished = {\url{https://aws.amazon.com/blogs/big-data/aws-serverless-data-analytics-pipeline-reference-architecture/}}
}
@misc{aws-translate:2021,
  title        = {Create a serverless pipeline to translate large documents with amazon translate},
  url          = {https://aws.amazon.com/blogs/machine-learning/create-a-serverless-pipeline-to-translate-large-documents-with-amazon-translate/},
  howpublished = {\url{https://aws.amazon.com/blogs/machine-learning/create-a-serverless-pipeline-to-translate-large-documents-with-amazon-translate/}}
}
@misc{openfaas:serverless:2021,
  title        = {OpenFaaS: Serverless Functions, Made Simple},
  url          = {https://www.openfaas.com},
  howpublished = {\url{https://www.openfaas.com}}
}
@misc{xilinx-vitis-vivado:2020,
  title = {{Xilinx Vitis}},
  url   = {https://docs.xilinx.com/v/u/2020.2-English/ug1416-vitis-documentation}
}
@misc{intel-rapl,
  title = {Intel RAPL},
  url   = {https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/running-average-power-limit-energy-reporting.html}
}
@inbook{schoenborn:pci-sig:2004,
  title     = {Board Design Guidelines for PCI Express Architecture},
  author    = {Schoenborn, Zale},
  year      = 2004,
  booktitle = {PCI-SIG APAC Developers Conference}
}
@misc{intel-xeon:2021,
  title        = {Intel Platanium Xeon Scalable Processors},
  url          = {https://www.intel.com/content/www/us/en/products/details/processors/xeon/scalable/platinum/products.html},
  howpublished = {\url{https://www.intel.com/content/www/us/en/products/details/processors/xeon/scalable/platinum/products.html}}
}
%%%%---citations need re-formatting----%%%%
@article{steiger-fpgaos-tc04,
  title   = {Operating systems for reconfigurable embedded platforms: online scheduling of real-time tasks},
  author  = {Steiger, C. and Walder, H. and Platzner, M.},
  year    = 2004,
  journal = {IEEE Transactions on Computers},
  volume  = 53,
  number  = 11,
  pages   = {1393--1407},
  doi     = {10.1109/TC.2004.99}
}
@inproceedings{brodersen-borph-isss06,
  title     = {A unified hardware/software runtime environment for FPGA-based reconfigurable computers using BORPH},
  author    = {Brodersen, Robert and Tkachenko, Artem and So, Hayden Kwok-Hay},
  year      = 2006,
  booktitle = {Proceedings of the 4th International Conference on Hardware/Software Codesign and System Synthesis (CODES+ISSS '06)},
  volume    = {},
  number    = {},
  pages     = {259--264},
  doi       = {10.1145/1176254.1176316}
}
@article{enno-reconos-acm09,
  title      = {ReconOS: Multithreaded Programming for Reconfigurable Computers},
  author     = {L\"{u}bbers, Enno and Platzner, Marco},
  year       = 2009,
  month      = oct,
  journal    = {ACM Trans. Embed. Comput. Syst.},
  publisher  = {Association for Computing Machinery},
  address    = {New York, NY, USA},
  volume     = 9,
  number     = 1,
  doi        = {10.1145/1596532.1596540},
  issn       = {1539-9087},
  url        = {https://doi.org/10.1145/1596532.1596540},
  issue_date = {October 2009},
  articleno  = 8,
  numpages   = 33,
  keywords   = {operating systems, Reconfigurable computing, multithreading}
}
@misc{lso_wiki,
  title   = {Large {Send} {Offload} ({LSO}) - {eduPERT} {KB} - {GÃ‰ANT} federated confluence},
  url     = {https://wiki.geant.org/pages/releaseview.action?pageId=121340564},
  urldate = {2021-06-26}
}
@misc{lro_wiki,
  title   = {Large {Receive} {Offload} ({LRO}) - {eduPERT} {KB} - {GÃ‰ANT} federated confluence},
  url     = {https://wiki.geant.org/pages/releaseview.action?pageId=121340571&src=contextnavpagetreemode},
  urldate = {2021-06-26},
  file    = {Large Receive Offload (LRO) - eduPERT KB - GÃ‰ANT federated confluence:C\:\\Users\\rohan\\Zotero\\storage\\LKLU3BK6\\releaseview.html:text/html}
}
@inproceedings{linux-container-07,
  title     = {Container-Based Operating System Virtualization: A Scalable, High-Performance Alternative to Hypervisors},
  author    = {Soltesz, Stephen and P\"{o}tzl, Herbert and Fiuczynski, Marc E. and Bavier, Andy and Peterson, Larry},
  year      = 2007,
  booktitle = {Proceedings of the 2nd ACM SIGOPS/EuroSys European Conference on Computer Systems 2007},
  location  = {Lisbon, Portugal},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  series    = {EuroSys '07},
  pages     = {275–287},
  doi       = {10.1145/1272996.1273025},
  isbn      = 9781595936363,
  url       = {https://doi.org/10.1145/1272996.1273025},
  numpages  = 13,
  keywords  = {alternative, system, hypervisor, operating, container, Xen, virtualization, Linux-VServer}
}
@inproceedings{olivier-thread-migr-hotos17,
  title     = {OS Support for Thread Migration and Distribution in the Fully Heterogeneous Datacenter},
  author    = {Olivier, Pierre and Kim, Sang-Hoon and Ravindran, Binoy},
  year      = 2017,
  booktitle = {Proceedings of the 16th Workshop on Hot Topics in Operating Systems},
  location  = {Whistler, BC, Canada},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  series    = {HotOS '17},
  pages     = {174–179},
  doi       = {10.1145/3102980.3103009},
  isbn      = 9781450350686,
  url       = {https://doi.org/10.1145/3102980.3103009},
  numpages  = 6,
  keywords  = {Distributed Shared Memory, Heterogeneous Computing, Operating Systems}
}
@article{thakur-load-balancing-survey17,
  title      = {A Taxonomic Survey on Load Balancing in Cloud},
  author     = {Thakur, Avnish and Goraya, Major Singh},
  year       = 2017,
  month      = nov,
  journal    = {J. Netw. Comput. Appl.},
  publisher  = {Academic Press Ltd.},
  address    = {GBR},
  volume     = 98,
  number     = {C},
  pages      = {43–57},
  doi        = {10.1016/j.jnca.2017.08.020},
  issn       = {1084-8045},
  url        = {https://doi.org/10.1016/j.jnca.2017.08.020},
  issue_date = {November 2017},
  numpages   = 15,
  keywords   = {Energy efficiency, Resource utilization, Quality of Service, Cloud computing, Load balancing}
}
@inproceedings{fb-datacenter-hpca2018,
  title     = {Applied Machine Learning at Facebook: A Datacenter Infrastructure Perspective},
  author    = {Hazelwood, Kim and Bird, Sarah and Brooks, David and Chintala, Soumith and Diril, Utku and Dzhulgakov, Dmytro and Fawzy, Mohamed and Jia, Bill and Jia, Yangqing and Kalro, Aditya and Law, James and Lee, Kevin and Lu, Jason and Noordhuis, Pieter and Smelyanskiy, Misha and Xiong, Liang and Wang, Xiaodong},
  year      = 2018,
  booktitle = {2018 IEEE International Symposium on High Performance Computer Architecture (HPCA)},
  volume    = {},
  number    = {},
  pages     = {620--629},
  doi       = {10.1109/HPCA.2018.00059}
}
@article{google-datacenter-isca15,
  title      = {Profiling a Warehouse-Scale Computer},
  author     = {Kanev, Svilen and Darago, Juan Pablo and Hazelwood, Kim and Ranganathan, Parthasarathy and Moseley, Tipp and Wei, Gu-Yeon and Brooks, David},
  year       = 2015,
  month      = jun,
  journal    = {SIGARCH Comput. Archit. News},
  publisher  = {Association for Computing Machinery},
  address    = {New York, NY, USA},
  volume     = 43,
  number     = {3S},
  pages      = {158–169},
  doi        = {10.1145/2872887.2750392},
  issn       = {0163-5964},
  url        = {https://doi.org/10.1145/2872887.2750392},
  issue_date = {June 2015},
  numpages   = 12
}
@article{planeta-migros-20,
  title   = {MigrOS: Transparent Operating Systems Live Migration Support for Containerised RDMA-applications},
  author  = {Maksym Planeta and Jan Bierbaum and L. Antony and Torsten Hoefler and Hermann H{\"a}rtig},
  year    = 2020,
  journal = {arXiv: Operating Systems}
}
@inproceedings{chen-parties-asplos19,
  title     = {PARTIES: QoS-Aware Resource Partitioning for Multiple Interactive Services},
  author    = {Chen, Shuang and Delimitrou, Christina and Mart\'{\i}nez, Jos\'{e} F.},
  year      = 2019,
  booktitle = {Proceedings of the Twenty-Fourth International Conference on Architectural Support for Programming Languages and Operating Systems},
  location  = {Providence, RI, USA},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  series    = {ASPLOS '19},
  pages     = {107–120},
  doi       = {10.1145/3297858.3304005},
  isbn      = 9781450362405,
  url       = {https://doi.org/10.1145/3297858.3304005},
  numpages  = 14,
  keywords  = {resource partitioning, resource management, isolation, cloud computing, quality of service, interference, datacenters}
}
@inproceedings{feeley-globalmem-sosp95,
  title     = {Implementing Global Memory Management in a Workstation Cluster},
  author    = {Feeley, M. J. and Morgan, W. E. and Pighin, E. P. and Karlin, A. R. and Levy, H. M. and Thekkath, C. A.},
  year      = 1995,
  booktitle = {Proceedings of the Fifteenth ACM Symposium on Operating Systems Principles},
  location  = {Copper Mountain, Colorado, USA},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  series    = {SOSP '95},
  pages     = {201–212},
  doi       = {10.1145/224056.224072},
  isbn      = {0897917154},
  url       = {https://doi.org/10.1145/224056.224072},
  numpages  = 12
}
@book{sorin-consistency-coherence-11,
  title     = {A Primer on Memory Consistency and Cache Coherence},
  author    = {Sorin, Daniel J. and Hill, Mark D. and Wood, David A.},
  year      = 2011,
  publisher = {Morgan Claypool Publishers},
  isbn      = 1608455645,
  edition   = {1st}
}
@inproceedings{mohammad-dist-gem5-ispass17,
  title     = {dist-gem5: Distributed simulation of computer clusters},
  author    = {Mohammad, Alian and Darbaz, Umur and Dozsa, Gabor and Diestelhorst, Stephan and Kim, Daehoon and Kim, Nam Sung},
  year      = 2017,
  booktitle = {2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)},
  volume    = {},
  number    = {},
  pages     = {153--162},
  doi       = {10.1109/ISPASS.2017.7975287}
}
@misc{amazon_firecracker_18,
  title   = {Firecracker â€“ {Lightweight} {Virtualization} for {Serverless} {Computing}},
  year    = 2018,
  month   = nov,
  journal = {Amazon Web Services},
  url     = {https://aws.amazon.com/blogs/aws/firecracker-lightweight-virtualization-for-serverless-computing/},
  urldate = {2021-06-27},
  note    = {Section: Amazon EC2},
  file    = {Snapshot:C\:\\Users\\rohan\\Zotero\\storage\\X7YZ7M9D\\firecracker-lightweight-virtualization-for-serverless-computing.html:text/html}
}
@inbook{mcKee-memory-wall-11,
  title     = {Memory Wall},
  author    = {McKee, Sally A. and Wisniewski, Robert W.},
  year      = 2011,
  booktitle = {Encyclopedia of Parallel Computing},
  publisher = {Springer US},
  address   = {Boston, MA},
  pages     = {1110--1116},
  doi       = {10.1007/978-0-387-09766-4_234},
  isbn      = {978-0-387-09766-4},
  url       = {https://doi.org/10.1007/978-0-387-09766-4_234},
  editor    = {Padua, David}
}
@article{creasy-vm370-ibm1981,
  title      = {The Origin of the VM/370 Time-Sharing System},
  author     = {Creasy, R. J.},
  year       = 1981,
  month      = sep,
  journal    = {IBM J. Res. Dev.},
  publisher  = {IBM Corp.},
  address    = {USA},
  volume     = 25,
  number     = 5,
  pages      = {483–490},
  doi        = {10.1147/rd.255.0483},
  issn       = {0018-8646},
  url        = {https://doi.org/10.1147/rd.255.0483},
  issue_date = {September 1981},
  numpages   = 8
}
@inproceedings{freepdk:mse:2007,
  title     = {FreePDK: An Open-Source Variation-Aware Design Kit},
  author    = {Stine, James E. and Castellanos, Ivan and Wood, Michael and Henson, Jeff and Love, Fred and Davis, W. Rhett and Franzon, Paul D. and Bucher, Michael and Basavarajaiah, Sunil and Oh, Julie and Jenkal, Ravi},
  year      = 2007,
  booktitle = {2007 IEEE International Conference on Microelectronic Systems Education (MSE'07)},
  volume    = {},
  number    = {},
  pages     = {173--174},
  doi       = {10.1109/MSE.2007.44}
}
@inproceedings{sprocket:socc:2018,
  title     = {Sprocket: A Serverless Video Processing Framework},
  author    = {Ao, Lixiang and Izhikevich, Liz and Voelker, Geoffrey M. and Porter, George},
  year      = 2018,
  booktitle = {Proceedings of the ACM Symposium on Cloud Computing},
  location  = {Carlsbad, CA, USA},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  series    = {SoCC '18},
  pages     = {263–274},
  doi       = {10.1145/3267809.3267815},
  isbn      = 9781450360111,
  url       = {https://doi.org/10.1145/3267809.3267815},
  numpages  = 12
}
@inproceedings{amdahls_law:acm:1967,
  title     = {Validity of the Single Processor Approach to Achieving Large Scale Computing Capabilities},
  author    = {Amdahl, Gene M.},
  year      = 1967,
  booktitle = {Proceedings of the April 18-20, 1967, Spring Joint Computer Conference},
  location  = {Atlantic City, New Jersey},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  series    = {AFIPS '67 (Spring)},
  pages     = {483–485},
  doi       = {10.1145/1465482.1465560},
  isbn      = 9781450378956,
  url       = {https://doi.org/10.1145/1465482.1465560},
  abstract  = {For over a decade prophets have voiced the contention that the organization of a single computer has reached its limits and that truly significant advances can be made only by interconnection of a multiplicity of computers in such a manner as to permit cooperative solution. Variously the proper direction has been pointed out as general purpose computers with a generalized interconnection of memories, or as specialized computers with geometrically related memory interconnections and controlled by one or more instruction streams.},
  numpages  = 3
}
@article{maestro:micro:2020,
  title   = {MAESTRO: A Data-Centric Approach to Understand Reuse, Performance, and Hardware Cost of DNN Mappings},
  author  = {Kwon, Hyoukjun and Chatarasi, Prasanth and Sarkar,      Vivek and     Krishna, Tushar and Pellauer, Michael and Parashar, Angshuman},
  year    = 2020,
  journal = {IEEE Micro},
  volume  = 40,
  number  = 3,
  pages   = {20--29},
  doi     = {10.1109/MM.2020.2985963}
}
@article{ngd_system:arvix:2022,
  title      = {In-storage Processing of {I/O} Intensive Applications on Computational Storage Drives},
  author     = {Ali Heydari Gorji and Mahdi Torabzadehkashi and Siavash Rezaei and Hossein Bobarshad and Vladimir Castro Alves and Pai H. Chou},
  year       = 2021,
  journal    = {CoRR},
  volume     = {abs/2112.12415},
  url        = {https://arxiv.org/abs/2112.12415},
  eprinttype = {arXiv},
  eprint     = {2112.12415},
  timestamp  = {Tue, 04 Jan 2022 15:59:27 +0100},
  biburl     = {https://dblp.org/rec/journals/corr/abs-2112-12415.bib},
  bibsource  = {dblp computer science bibliography, https://dblp.org}
}
@misc{hey:http_load_generator,
  title        = {{hey http Load Generator}},
  url          = {https://github.com/rakyll/hey},
  howpublished = {\url{https://github.com/rakyll/hey}}
}
@misc{aws-lambda-quotas,
  title        = {{AWS Lambda Quotas}},
  url          = {https://docs.aws.amazon.com/lambda/latest/dg/gettingstarted-limits.html},
  howpublished = {\url{https://docs.aws.amazon.com/lambda/latest/dg/gettingstarted-limits.html}}
}
@misc{alveo:u250,
  title        = {Alveo U250 Data Center Accelerator Card},
  journal      = {Xilinx},
  howpublished = {\url{https://www.xilinx.com/products/boards-and-kits/alveo/u250.html}}
}
@misc{alveo:u280,
  title        = {Alveo U280 Data Center Accelerator Card},
  journal      = {Xilinx},
  howpublished = {\url{https://www.xilinx.com/products/boards-and-kits/alveo/u280.html}}
}
@inproceedings{icebreaker:asplos:2022,
  title     = {IceBreaker: Warming Serverless Functions Better with Heterogeneity},
  author    = {Roy, Rohan Basu and Patel, Tirthak and Tiwari, Devesh},
  year      = 2022,
  booktitle = {ASPLOS}
}
@inproceedings{infless:asplos:2022,
  title     = {INFless: A Native Serverless System for Low-Latency, High-Throughput Inference},
  author    = {Yang, Yanan and Zhao, Laiping and Li, Yiming and Zhang, Huanyu and Li, Jie and Zhao, Mingyang and Chen, Xingzhen and Li, Keqiu},
  year      = 2022,
  booktitle = {ASPLOS}
}
@misc{vtune,
  title    = {Fix {Performance} {Bottlenecks} with {Intel}® {VTune}™ {Profiler}},
  journal  = {Intel},
  url      = {https://www.intel.com/content/www/us/en/developer/tools/oneapi/vtune-profiler.html},
  urldate  = {2023-04-20},
  abstract = {Use advanced sampling and profiling methods to quickly analyze code, isolate issues, and deliver performance insight on modern CPUs, GPUs, and FPGAs.},
  language = {en},
  file     = {Snapshot:/home/a972m888/Zotero/storage/499YLBKV/vtune-profiler.html:text/html}
}
@misc{speccpu2017,
  title   = {Standard Performance Evaluation Corporation},
  journal = {SPEC CPU® 2017},
  url     = {https://www.spec.org/cpu2017/}
}
@inproceedings{bench,
  title     = {BigDataBench: A big data benchmark suite from internet services},
  author    = {Wang, Lei and Zhan, Jianfeng and Luo, Chunjie and Zhu, Yuqing and Yang, Qiang and He, Yongqiang and Gao, Wanling and Jia, Zhen and Shi, Yingjie and Zhang, Shujie and Zheng, Chen and Lu, Gang and Zhan, Kent and Li, Xiaona and Qiu, Bizhu},
  year      = 2014,
  booktitle = {2014 IEEE 20th International Symposium on High Performance Computer Architecture (HPCA)},
  volume    = {},
  number    = {},
  pages     = {488--499},
  doi       = {10.1109/HPCA.2014.6835958}
}
@inproceedings{panda_wait_2018,
  title      = {Wait of a {Decade}: {Did} {SPEC} {CPU} 2017 {Broaden} the {Performance} {Horizon}?},
  shorttitle = {Wait of a {Decade}},
  author     = {Panda, Reena and Song, Shuang and Dean, Joseph and John, Lizy K.},
  year       = 2018,
  month      = feb,
  booktitle  = {2018 {IEEE} {International} {Symposium} on {High} {Performance} {Computer} {Architecture} ({HPCA})},
  pages      = {271--282},
  doi        = {10.1109/HPCA.2018.00032},
  note       = {ISSN: 2378-203X},
  abstract   = {The recently released SPEC CPU2017 benchmark suite has already started receiving a lot of attention from both industry and academic communities. However, due to the significantly high size and complexity of the benchmarks, simulating all the CPU2017 benchmarks for design trade-off evaluation is likely to become extremely difficult. Simulating a randomly selected subset, or a random input set, may result in misleading conclusions. This paper analyzes the SPEC CPU2017 benchmarks using performance counter based experimentation from seven commercial systems, and uses statistical techniques such as principal component analysis and clustering to identify similarities among benchmarks. Such analysis can reveal benchmark redundancies and identify subsets for researchers who cannot use all benchmarks in pre-silicon design trade-off evaluations. Many of the SPEC CPU2006 benchmarks have been replaced with larger and complex workloads in the SPEC CPU2017 suite. However, compared to CPU2006, it is unknown whether SPEC CPU2017 benchmarks have different performance demands or whether they stress machines differently. Additionally, to evaluate the balance of CPU2017 benchmarks, we analyze the performance characteristics of CPU2017 workloads and compare them with emerging database, graph analytics and electronic design automation (EDA) workloads. This paper provides the first detailed analysis of SPEC CPU2017 benchmark suite for the architecture community.},
  keywords   = {Benchmark Redundancy Analysis, Benchmark testing, C++ languages, Industries, Measurement, Performance Evaluation, Principal component analysis, Redundancy, SPEC CPU2017, Stress},
  file       = {IEEE Xplore Abstract Record:/home/a972m888/Zotero/storage/CRY4LIKC/stamp.html:text/html;IEEE Xplore Full Text PDF:/home/a972m888/Zotero/storage/6URUPDDM/Panda et al. - 2018 - Wait of a Decade Did SPEC CPU 2017 Broaden the Pe.pdf:application/pdf}
}
@misc{noauthor_cloudsuite_nodate,
  title    = {{CloudSuite} {\textbar} {A} {Benchmark} {Suite} for {Cloud} {Services}},
  url      = {https://www.cloudsuite.ch/},
  urldate  = {2023-04-27},
  language = {en-US},
  file     = {Snapshot:/home/a972m888/Zotero/storage/IP62E67W/www.cloudsuite.ch.html:text/html}
}
@inproceedings{wfa:fpl:2021,
  title     = {An FPGA Accelerator of the Wavefront Algorithm for Genomics Pairwise Alignment},
  author    = {Haghi, Abbas and Marco-Sola, Santiago and Alvarez, Lluc and Diamantopoulos, Dionysios and Hagleitner, Christoph and Moreto, Miquel},
  year      = 2021,
  booktitle = {FPL}
}
@inproceedings{geniehd:date:2020,
  title     = {GenieHD: Efficient DNA Pattern Matching Accelerator Using Hyperdimensional Computing},
  author    = {Kim, Yeseong and Imani, Mohsen and Moshiri, Niema and Rosing, Tajana},
  year      = 2020,
  booktitle = {DATE}
}
@article{asap:toc:2019,
  title   = {ASAP: Accelerated Short-Read Alignment on Programmable Hardware},
  author  = {Banerjee, Subho Sankar and El-Hadedy, Mohamed and Lim, Jong Bin and Kalbarczyk, Zbigniew T. and Chen, Deming and Lumetta, Steven S. and Iyer, Ravishankar K.},
  year    = 2019,
  journal = {IEEE Transactions on Computers}
}
@inproceedings{seedex:micro:2020,
  title     = {SeedEx: A Genome Sequencing Accelerator for Optimal Alignments in Subminimal Space},
  author    = {Fujiki, Daichi and Wu, Shunhao and Ozog, Nathan and Goliya, Kush and Blaauw, David and Narayanasamy, Satish and Das, Reetuparna},
  year      = 2020,
  booktitle = {MICRO}
}
@inproceedings{smem:fpl:2018,
  title     = {SMEM++: A Pipelined and Time-Multiplexed SMEM Seeding Accelerator for Genome Sequencing},
  author    = {Cong, Jason and Guo, Licheng and Huang, Po-Tsang and Wei, Peng and Yu, Tianhe},
  year      = 2018,
  booktitle = {FPL}
}
@inproceedings{savi:iccad:2020,
  title     = {Seed-and-Vote Based in-Memory Accelerator for DNA Read Mapping},
  author    = {Laguna, Ann Franchesca and Gamaarachchi, Hasindu and Yin, Xunzhao and Niemier, Michael and Parameswaran, Sri and Hu, X. Sharon},
  year      = 2020,
  booktitle = {ICCAD}
}
@inproceedings{medal:micro:2019,
  title     = {MEDAL: Scalable DIMM Based Near Data Processing Accelerator for DNA Seeding Algorithm},
  author    = {Huangfu, Wenqin and Li, Xueqi and Li, Shuangchen and Hu, Xing and Gu, Peng and Xie, Yuan},
  year      = 2019,
  booktitle = {MICRO}
}
@inproceedings{nest:iccad:2020,
  title     = {NEST: DIMM based Near-Data-Processing Accelerator for K-mer Counting},
  author    = {Huangfu, Wenqin and Malladi, Krishna T. and Li, Shuangchen and Gu, Peng and Xie, Yuan},
  year      = 2020,
  booktitle = {ICCAD}
}
@inproceedings{genasm:micro:2020,
  title     = {GenASM: A High-Performance, Low-Power Approximate String Matching Acceleration Framework for Genome Sequence Analysis},
  author    = {Cali, Damla Senol and Kalsi, Gurpreet S. and Bingöl, Zülal and Firtina, Can and Subramanian, Lavanya and Kim, Jeremie S. and Ausavarungnirun, Rachata and Alser, Mohammed and Gomez-Luna, Juan and Boroumand, Amirali and Norion, Anant and Scibisz, Allison and Subramoneyon, Sreenivas and Alkan, Can and Ghose, Saugata and Mutlu, Onur},
  year      = 2020,
  booktitle = {MICRO}
}
@inproceedings{genax:isca:2018,
  title     = {GenAx: A Genome Sequencing Accelerator},
  author    = {Fujiki, Daichi and Subramaniyan, Arun and Zhang, Tianjun and Zeng, Yu and Das, Reetuparna and Blaauw, David and Narayanasamy, Satish},
  year      = 2018,
  booktitle = {ISCA}
}
@inproceedings{gencache:micro:2019,
  title     = {GenCache: Leveraging In-Cache Operators for Efficient Sequence Alignment},
  author    = {Nag, Anirban and Ramachandra, C. N. and Balasubramonian, Rajeev and Stutsman, Ryan and Giacomin, Edouard and Kambalasubramanyam, Hari and Gaillardon, Pierre-Emmanuel},
  year      = 2019,
  booktitle = {MICRO}
}
@inproceedings{pointacc:micro:2021,
  title     = {PointAcc: Efficient Point Cloud Accelerator},
  author    = {Lin, Yujun and Zhang, Zhekai and Tang, Haotian and Wang, Hanrui and Han, Song},
  year      = 2021,
  booktitle = {MICRO}
}
@inproceedings{murray:micro:2016,
  title     = {The microarchitecture of a real-time robot motion planning accelerator},
  author    = {Murray, Sean and Floyd-Jones, William and Qi, Ying and Konidaris, George and Sorin, Daniel J.},
  year      = 2016,
  booktitle = {MICRO}
}
@inproceedings{accugraph:pact:2018,
  title     = {An Efficient Graph Accelerator with Parallel Data Conflict Management},
  author    = {Yao, Pengcheng and Zheng, Long and Liao, Xiaofei and Jin, Hai and He, Bingsheng},
  year      = 2018,
  booktitle = {PACT}
}
@article{engn:toc:2021,
  title   = {EnGN: A High-Throughput and Energy-Efficient Accelerator for Large Graph Neural Networks},
  author  = {Liang, Shengwen and Wang, Ying and Liu, Cheng and He, Lei and LI, Huawei and Xu, Dawen and Li, Xiaowei},
  year    = 2021,
  journal = {TC}
}
@inproceedings{deepgraph:hpca:2021,
  title     = {DepGraph: A Dependency-Driven Accelerator for Efficient Iterative Graph Processing},
  author    = {Zhang, Yu and Liao, Xiaofei and Jin, Hai and He, Ligang and He, Bingsheng and Liu, Haikun and Gu, Lin},
  year      = 2021,
  booktitle = {HPCA}
}
@inproceedings{gcnax:hpca:2021,
  title     = {GCNAX: A Flexible and Energy-efficient Accelerator for Graph Convolutional Neural Networks},
  author    = {Li, Jiajun and Louri, Ahmed and Karanth, Avinash and Bunescu, Razvan},
  year      = 2021,
  booktitle = {HPCA}
}
@inproceedings{segram:isca:2022,
  title     = {SeGraM: A Universal Hardware Accelerator for Genomic Sequence-to-Graph and Sequence-to-Sequence Mapping},
  author    = {Cali, Damla Senol and Kanellopoulos, Konstantinos and Lindegger, Jo\"{e}l and Bing\"{o}l, Z\"{u}lal and Kalsi, Gurpreet S. and Zuo, Ziyi and Firtina, Can and Cavlak, Meryem Banu and Kim, Jeremie and Ghiasi, Nika Mansouri and Singh, Gagandeep and G\'{o}mez-Luna, Juan and Alserr, Nour Almadhoun and Alser, Mohammed and Subramoney, Sreenivas and Alkan, Can and Ghose, Saugata and Mutlu, Onur},
  year      = 2022,
  booktitle = {ISCA}
}
@inproceedings{lccg:sc:2021,
  title     = {LCCG: A Locality-Centric Hardware Accelerator for High Throughput of Concurrent Graph Processing},
  author    = {Zhao, Jin and Zhang, Yu and Liao, Xiaofei and He, Ligang and He, Bingsheng and Jin, Hai and Liu, Haikun},
  year      = 2021,
  booktitle = {SC}
}
@inproceedings{jetstream:micro:2021,
  title     = {JetStream: Graph Analytics on Streaming Data with Event-Driven Hardware Accelerator},
  author    = {Rahman, Shafiur and Afarin, Mahbod and Abu-Ghazaleh, Nael and Gupta, Rajiv},
  year      = 2021,
  booktitle = {MICRO}
}
@inproceedings{graphpulse:micro:2020,
  title     = {GraphPulse: An Event-Driven Hardware Accelerator for Asynchronous Graph Processing},
  author    = {Rahman, Shafiur and Abu-Ghazaleh, Nael and Gupta, Rajiv},
  year      = 2020,
  booktitle = {MICRO}
}
@inproceedings{conda:isca:2019,
  title     = {CoNDA: Efficient Cache Coherence Support for near-Data Accelerators},
  author    = {Boroumand, Amirali and Ghose, Saugata and Patel, Minesh and Hassan, Hasan and Lucia, Brandon and Ausavarungnirun, Rachata and Hsieh, Kevin and Hajinazar, Nastaran and Malladi, Krishna T. and Zheng, Hongzhong and Mutlu, Onur},
  year      = 2019,
  booktitle = {ISCA}
}
@inproceedings{hygcn:hpca:2020,
  title     = {HyGCN: A GCN Accelerator with Hybrid Architecture},
  author    = {Yan, Mingyu and Deng, Lei and Hu, Xing and Liang, Ling and Feng, Yujing and Ye, Xiaochun and Zhang, Zhimin and Fan, Dongrui and Xie, Yuan},
  year      = 2020,
  booktitle = {HPCA}
}
@inproceedings{graphr:hpca:2018,
  title     = {GraphR: Accelerating Graph Processing Using ReRAM},
  author    = {Song, Linghao and Zhuo, Youwei and Qian, Xuehai and Li, Hai and Chen, Yiran},
  year      = 2018,
  booktitle = {HPCA}
}
@inproceedings{graphp:hpca:2018,
  title     = {GraphP: Reducing Communication for PIM-Based Graph Processing with Efficient Data Partition},
  author    = {Zhang, Mingxing and Zhuo, Youwei and Wang, Chao and Gao, Mingyu and Wu, Yongwei and Chen, Kang and Kozyrakis, Christos and Qian, Xuehai},
  year      = 2018,
  booktitle = {HPCA}
}
@inproceedings{phi:micro:2019,
  title     = {PHI: Architectural Support for Synchronization- and Bandwidth-Efficient Commutative Scatter Updates},
  author    = {Mukkara, Anurag and Beckmann, Nathan and Sanchez, Daniel},
  year      = 2019,
  booktitle = {MICRO}
}
@inproceedings{minnow:asplos:2018,
  title     = {Minnow: Lightweight Offload Engines for Worklist Management and Worklist-Directed Prefetching},
  author    = {Zhang, Dan and Ma, Xiaoyu and Thomson, Michael and Chiou, Derek},
  year      = 2018,
  booktitle = {ASPLOS}
}
@inproceedings{hats:micro:2018,
  title     = {Exploiting Locality in Graph Analytics through Hardware-Accelerated Traversal Scheduling},
  author    = {Mukkara, Anurag and Beckmann, Nathan and Abeydeera, Maleen and Ma, Xiaosong and Sanchez, Daniel},
  year      = 2018,
  booktitle = {MICRO}
}
@inproceedings{awb-gcn:micro:2020,
  title     = {AWB-GCN: A Graph Convolutional Network Accelerator with Runtime Workload Rebalancing},
  author    = {Geng, Tong and Li, Ang and Shi, Runbin and Wu, Chunshu and Wang, Tianqi and Li, Yanfei and Haghi, Pouya and Tumeo, Antonino and Che, Shuai and Reinhardt, Steve and Herbordt, Martin C.},
  year      = 2020,
  booktitle = {MICRO}
}
@misc{pcie-p2pdma:lwn:,
  title        = {{Device-to-device memory-transfer offload with P2PDMA}},
  year         = 2019,
  howpublished = {\url{https://lwn.net/Articles/767281/}}
}
@inproceedings{graphicionado:micro:2016,
  title     = {Graphicionado: A high-performance and energy-efficient accelerator for graph analytics},
  author    = {Ham, Tae Jun and Wu, Lisa and Sundaram, Narayanan and Satish, Nadathur and Martonosi, Margaret},
  year      = 2016,
  booktitle = {MICRO}
}
@inproceedings{darwin:asplos:2018,
  title     = {Darwin: A Genomics Co-Processor Provides up to 15,000X Acceleration on Long Read Assembly},
  author    = {Turakhia, Yatish and Bejerano, Gill and Dally, William J.},
  year      = 2018,
  booktitle = {ASPLOS}
}
@inproceedings{genstore:asplos:2022,
  title     = {GenStore: A High-Performance in-Storage Processing System for Genome Sequence Analysis},
  author    = {Mansouri Ghiasi, Nika and Park, Jisung and Mustafa, Harun and Kim, Jeremie and Olgun, Ataberk and Gollwitzer, Arvid and Senol Cali, Damla and Firtina, Can and Mao, Haiyu and Almadhoun Alserr, Nour and Ausavarungnirun, Rachata and Vijaykumar, Nandita and Alser, Mohammed and Mutlu, Onur},
  year      = 2022,
  booktitle = {ASPLOS}
}
@misc{scaleflux,
  title        = {{ScaleFlux CSD 3000}},
  howpublished = {\url{https://oss.scaleflux.com/202111/file_20211104_133214475.pdf}}
}
@misc{pliops,
  title        = {{Pliops Extreme Data Processor}},
  howpublished = {\url{https://pliops.com/product/}}
}
@misc{netint,
  title        = {{Netint Codensity Solid State Drives}},
  howpublished = {\url{https://netint.ca/wp-content/uploads/2020/07/NETINT\_Codensity\_D408\_SSD\_Product\_Brief\_20PB001-1.pdf}}
}
@misc{eideticom,
  title        = {{Eideticom NoLoad SmartSSD}},
  howpublished = {\url{https://www.eideticom.com/media/attachments/2020/11/09/noload_smartssd_product_brief1.pdf}}
}
@misc{ngdsystems,
  title        = {NGD system NVMe computational storage},
  howpublished = {\url{https://ngdsystems.com/nvme-computational-storage-a-compelling-solution-for-bringing-intelligence-to-the-edge/}}
}
@article{extrav:pvldb:2017,
  title   = {ExtraV: Boosting Graph Processing near Storage with a Coherent Accelerator},
  author  = {Lee, Jinho and Kim, Heesu and Yoo, Sungjoo and Choi, Kiyoung and Hofstee, H. Peter and Nam, Gi-Joon and Nutter, Mark R. and Jamsek, Damir},
  year    = 2017,
  journal = {PVLDB}
}
@inproceedings{molecule:asplos:2022,
  title     = {Serverless Computing on Heterogeneous Computers},
  author    = {Du, Dong and Liu, Qingyuan and Jiang, Xueqiang and Xia, Yubin and Zang, Binyu and Chen, Haibo},
  year      = 2022,
  booktitle = {ASPLOS}
}
@inproceedings{tpu:isca:2017,
  title     = {In-Datacenter Performance Analysis of a Tensor Processing Unit},
  author    = {Jouppi, Norman P and Young, Cliff and Patil, Nishant and Patterson, David and Agrawal, Gaurav and Bajwa, Raminder and Bates, Sarah and Bhatia, Suresh and Boden, Nan and Borchers, Al and others},
  year      = 2017,
  booktitle = {ISCA}
}
@inproceedings{accelerometer:asplos:2020,
  title     = {Accelerometer: Understanding Acceleration Opportunities for Data Center Overheads at Hyperscale},
  author    = {Sriraman, Akshitha and Dhanotia, Abhishek},
  year      = 2020,
  booktitle = {ASPLOS}
}
@inproceedings{planaria:micro:2020,
  title     = {Planaria: Dynamic architecture fission for spatial multi-tenant acceleration of deep neural networks},
  author    = {Ghodrati, Soroush and Ahn, Byung Hoon and Kim, Joon Kyung and Kinzer, Sean and Yatham, Brahmendra Reddy and Alla, Navateja and Sharma, Hardik and Alian, Mohammad and Ebrahimi, Eiman and Kim, Nam Sung and others},
  year      = 2020,
  booktitle = {MICRO}
}
@inproceedings{verigoodml:2021,
  title     = {VeriGOOD-ML: An Open-Source Flow for Automated ML Hardware Synthesis},
  author    = {Esmaeilzadeh, Hadi and Ghodrati, Soroush and Gu, Jie and Guo, Shiyu and Kahng, Andrew B and Kim, Joon Kyung and Kinzer, Sean and Mahapatra, Rohan and Manasi, Susmita Dey and Mascarenhas, Edwin and others},
  year      = 2021,
  booktitle = 2021
}
%% cleanup citations are up there
%%%--------------------------------------------------------------------------------%%%
%% other citations are down there
%%%%%%% Data center
@inproceedings{landgraf-synergy-asplos21,
  title     = {Compiler-Driven FPGA Virtualization with SYNERGY},
  author    = {Landgraf, Joshua and Yang, Tiffany and Lin, Will and Rossbach, Christopher J. and Schkufza, Eric},
  year      = 2021,
  booktitle = {Proceedings of the 26th ACM International Conference on Architectural Support for Programming Languages and Operating Systems},
  location  = {Virtual, USA},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  series    = {ASPLOS 2021},
  pages     = {818–831},
  doi       = {10.1145/3445814.3446755},
  isbn      = 9781450383172,
  url       = {https://doi.org/10.1145/3445814.3446755},
  numpages  = 14,
  keywords  = {FPGAs, Virtualization, Operating Systems, Compilers}
}
%% load balancers
%%%%%%%% Microservices
@inproceedings{e3:atc:2019,
  title     = {E3: Energy-Efficient Microservices on SmartNIC-Accelerated Servers},
  author    = {Liu, Ming and Peter, Simon and Krishnamurthy, Arvind and Phothilimthana, Phitchaya Mangpo},
  booktitle = {ATC}
}
%%%%%%% Serverless
@inproceedings{tabla:hpca:2016,
  title     = {TABLA: A unified template-based framework for accelerating statistical machine learning},
  author    = {Mahajan, Divya and Park, Jongse and Amaro, Emmanuel and Sharma, Hardik and Yazdanbakhsh, Amir and Kim, Joon Kyung and Esmaeilzadeh, Hadi},
  year      = 2016,
  booktitle = {HPCA}
}
@misc{xilinx-xrt:2022,
  title        = {{Xilinx Runtime Library}},
  howpublished = {\url{https://www.xilinx.com/products/design-tools/vitis/xrt.html}}
}
@misc{nvidia-jetson-tx2,
  title        = {Nvidia Jetson TX2},
  howpublished = {\url{https://developer.nvidia.com/embedded/jetson-tx2}}
}
@misc{coca-cola:aws-lambda:2021,
  title        = {Coca Cola uses AWS Serverless},
  howpublished = {\url{https://aws.amazon.com/solutions/case-studies/coca-cola-freestyle/}}
}
@misc{vogue:aws-lambda:2021,
  title        = {PhotoVogue Case Study},
  howpublished = {\url{https://aws.amazon.com/solutions/case-studies/photovogue/}}
}
@misc{netflix:aws-lambda:2021,
  title        = {Netflix \& AWS Lambda Case Study},
  howpublished = {\url{https://aws.amazon.com/solutions/case-studies/netflix-and-aws-lambda/}}
}
@misc{thegurdian:aws-lambda:2021,
  title        = {Guardian News \& Media Automates Subscription Fulfillment Using AWS Step Functions},
  howpublished = {\url{https://aws.amazon.com/solutions/case-studies/the-guardian/}}
}
@misc{semios::google-cloud-functions:2021,
  title        = {Semios: Helping growers produce more sustainable, profitable crops},
  howpublished = {\url={https://cloud.google.com/customers/semios}}
}
@misc{pwc:azure-serverless:2021,
  title        = {PwC helps make compliance easier, automates regulatory obligation identification with Microsoft Azure Cognitive Search},
  howpublished = {\url{https://customers.microsoft.com/en-us/story/811347-pwc-partner-professional-services-azure}}
}
@misc{bmw:aws-lambda:2021,
  title        = {{BMW} uses {AWS}},
  howpublished = {\url{https://aws.amazon.com/solutions/case-studies/bmw-group-case-study/}}
}
@misc{lufthansa:google-cloud-functions:2021,
  title        = {{Lufthansa Technik}: Keeping airlines flying optimally with AI-powered TechOps platform AVIATAR},
  howpublished = {\url{https://cloud.google.com/customers/lufthansa/}}
}
%%%%---citations need re-formatting----%%%%
@ieeetranbstctl{bstctl:etal,
  ctluse_forced_etal       = {yes},
  ctlmax_names_forced_etal = 3
}
@ieeetranbstctl{bstctl:nodash,
  ctldash_repeated_names = {no}
}
@ieeetranbstctl{bstctl:simpurl,
  ctlname_url_prefix = {Available:}
}
@ieeetranbstctl{ieee:bstcontrol,
  ctluse_article_number    = {yes},
  ctluse_paper             = {yes},
  ctluse_forced_etal       = {no},
  ctlmax_names_forced_etal = 10,
  ctlnames_show_etal       = 1,
  ctluse_alt_spacing       = {yes},
  ctlalt_stretch_factor    = 4,
  ctldash_repeated_names   = {no},
  ctlname_format_string    = {{f.~}{vv~}{ll}{, jj}},
  ctlname_latex_cmd        = {},
  ctlname_url_prefix       = {[URL]}
}
@string{toplas=" {ACM} Transactions on Programming Languages and Systems"}
@string{tocs=" {ACM} Transactions on Computer Systems"}
@string{pldi=" {ACM} Conference on Programming Language Design and Implementation (PLDI)"}
@string{popl=" {ACM} Symposium on Principles of Programming Languages (POPL)"}
@string{oopsla= "{ACM} Conference on Object-Oriented Programming, Systems, Languages, and Applications (OOPSLA)"}
@string{icfp= " {ACM} International Conference on Functional Programming"}
@string{usenix=" {USENIX} Annual Technical Conference"}
@string{esop=" European Symposium on Programming"}
@string{eurosys=" {ACM} European Conference on Computer Systems"}
@string{mobisys=" International Conference on Mobile Systems, Applications, and Services"}
@string{ndss=" Network and Distributed Systems Security Symposium"}
@string{ccs=" {ACM} Conference on Computer and Communications Security"}
@string{cc=" International Conference on Compiler Construction"}
@string{tldi=" {ACM} Workshop on Types in Language Design and Implementation"}
@string{mspc=" {ACM SIGPLAN} Workshop on Memory Systems Performance \& Correctness"}
@string{spaa= " {ACM} Symposium on Parallel Algorithms and Architectures"}
@string{sas= " International Static Analysis Symposium"}
@string{ppopp=" {ACM} Symposium on Principles and Practice of Parallel Programming (PPOPP)"}
@string{ase=" {IEEE/ACM} International Conference on Automated
Software Engineering (ASE)"}
@string{sosp=" {ACM} Symposium on Operating Systems Principles (SOSP)"}
@string{ismm=" International Symposium on Memory Management"}
@string{concur=" International Conference on Concurrency Theory"}
@string{fse=" International Symposium on Foundations of Software Engineering (FSE)"}
@string{icse=" International Conference on Software Engineering"}
@string{isca="International Symposium on Computer (ISCA)"}
@string{hpca="International Symposium on High-Performance Computer Architecture (HPCA)"}
@string{osdi="{USENIX} Symposium on Operating Systems Design and Implementation (OSDI)"}
@string{transact="{ACM SIGPLAN} Workshop on Transactional Computing"}
@string{hotpar="{USENIX} Workshop on Hot Topics in Parallelism"}
@string{hotos="{USENIX} Workshop on Hot Topics in Operating Systems"}
@string{nsdi="{USENIX} Symposium on Networked Systems Design and Implementation (NSDI)"}
@string{cav="International Conference on Computer Aided Verification (CAV)"}
@string{issta= "{ACM SIGSOFT} International Symposium on Software Testing and Analysis"}
@string{tacas="Conference on Tools and Algorithms for the Construction
and Analysis of Systems"}
@string{sigmod="{ACM SIGMOD} International Conference on Management of Data"}
@string{micro="{IEEE/ACM} International Symposium on Microarchitecture"}
@string{oakland="{IEEE} Symposium on Security and Privacy"}
@string{asplos="International Conference on Architectural Support for Programming Languages and Operating Systems"}
@string{iiswc="{IEEE} International Symposium on Workload Characterization"}
@string{iscas="{IEEE} International Symposium on Circuits and Systems (ISCAS)"}
@string{icm="{IEEE} International Conference on Microelectronics ({ICM})"}
@string{ijcnn="{IEEE} International Joint Conference on Neural Networks ({IJCNN})"}
@string{entcs=" Electronic Notes in Theoretical Computer Science"}
@string{springer="Springer-Verlag"}
@string{lncs=" Lecture Notes in Computer Science"}
@string{aw="Addison-Wesley"}
@inproceedings{chameleon:iclr19,
  title     = {Chameleon: Adaptive Code Optimization for Expedited Deep Neural Network Compilation},
  author    = {Ahn, Byung Hoon and Pilligundla, Prannoy and Esmaeilzadeh, Hadi},
  year      = 2020,
  booktitle = {ICLR}
}
@inproceedings{halide:pldi13,
  title     = {Halide: a language and compiler for optimizing parallelism, locality, and recomputation in image processing pipelines},
  author    = {Ragan-Kelley, Jonathan and Barnes, Connelly and Adams, Andrew and Paris, Sylvain and Durand, Fr{\'e}do and Amarasinghe, Saman},
  year      = 2013,
  booktitle = {PLDI}
}
@inproceedings{tvm:osdi18,
  title     = {{TVM}: An automated end-to-end optimizing compiler for deep learning},
  author    = {Chen, Tianqi and Moreau, Thierry and Jiang, Ziheng and Zheng, Lianmin and Yan, Eddie and Shen, Haichen and Cowan, Meghan and Wang, Leyuan and Hu, Yuwei and Ceze, Luis and others},
  year      = 2018,
  booktitle = {OSDI}
}
@inproceedings{robox:isca:2018,
  title     = {Robox: an end-to-end solution to accelerate autonomous control in robotics},
  author    = {Sacks, Jacob and Mahajan, Divya and Lawson, Richard C and Esmaeilzadeh, Hadi},
  year      = 2018,
  booktitle = {ISCA}
}
@inproceedings{serenity:sysml19,
  title     = {Ordering Chaos: Memory-Aware Scheduling of Irregularly Wired Neural Networks for Edge Devices},
  author    = {Ahn, Byung Hoon and Lee, Jinwon and Lin, Jamie Menjay and Cheng, Hsin-Pai and Hou, Jilei and Esmaeilzadeh, Hadi},
  year      = 2020,
  booktitle = {MLSys}
}
@misc{qualcomm_cloud,
  title        = {Qualcomm Brings Power Efficient Artificial Intelligence Inference Processing to the Cloud},
  author       = {{Qualcomm}},
  year         = 2019,
  howpublished = {\url{https://www.qualcomm.com/news/releases/2019/04/09/qualcomm-brings-power-efficient-artificial-intelligence-inference}}
}
@misc{darpa_rtml,
  title        = {Real Time Machine Learning ({RTML})},
  author       = {{DARPA}},
  year         = 2019,
  howpublished = {\url{https://www.darpa.mil/attachments/Real%20Time%20Machine%20Learning%20Proposers%20Day_v3.pdf}}
}
@inproceedings{bpvec:dac19,
  title     = {Bit-Parallel Vector Composability for Neural Acceleration},
  author    = {Ghodrati, Soroush and Sharma, Hardik and Young, Cliff and Kim, Nam Sung and Esmaeilzadeh, Hadi},
  year      = 2019,
  booktitle = {DAC (under-submission)}
}
@article{gnn,
  title   = {Graph neural networks: A review of methods and applications},
  author  = {Zhou, Jie and Cui, Ganqu and Zhang, Zhengyan and Yang, Cheng and Liu, Zhiyuan and Sun, Maosong},
  year    = 2018,
  journal = {arXiv}
}
@inproceedings{ohai:isca19,
  title     = {Abstractions for Cross-Domain Multi-Acceleration},
  author    = {Kim, Joon Kyung and Kinzer, Sean and Ahn, Byung Hoon and Sharma, Hardik and Mahajan, Divya and Ghodrati, Soroush and Pilligundla, Prannoy and Mishra, Priyanka and Robatmili, Behnam and Sarikhani, Parisa and Mahmoudi, Babak and Lavasani, Maysam and Park, Jongse and Esmaeilzadeh, Hadi},
  year      = 2019,
  booktitle = {ISCA (under-submission)}
}
@article{8-bit-mult,
  title   = {A High Speed and Low Power 8 Bit x 8 Bit Multiplier Design using Novel Two Transistor (2T) XOR Gates},
  author  = {Upadhyay, Himani and Roy Chowdhury, Shubhajit},
  year    = 2015,
  month   = {01},
  journal = {Journal of Low Power Electronics},
  pages   = {},
  doi     = {10.1166/jolpe.2015.1362}
}
@inproceedings{li2017caterpillar,
  title        = {CATERPILLAR: Coarse Grain Reconfigurable Architecture for Accelerating the Training of Deep Neural Networks},
  author       = {Li, Yuanfang and Pedram, Ardavan},
  year         = 2017,
  booktitle    = {Application-specific Systems, Architectures and Processors (ASAP), 2017 IEEE 28th International Conference on},
  pages        = {1--10},
  organization = {IEEE}
}
@misc{caffe2,
  title        = {Caffe2},
  author       = {{Facebook AI Research}},
  howpublished = {\url{https://caffe2.ai/}}
}
@inproceedings{googlenet,
  title     = {Going deeper with convolutions},
  author    = {Szegedy, Christian and Liu, Wei and Jia, Yangqing and Sermanet, Pierre and Reed, Scott and Anguelov, Dragomir and Erhan, Dumitru and Vanhoucke, Vincent and Rabinovich, Andrew},
  year      = 2015,
  booktitle = {CVPR}
}
@inproceedings{analog:vlsi,
  title        = {Future of analog in the VLSI environment},
  author       = {Vittoz, Eric A},
  year         = 1990,
  booktitle    = {Circuits and Systems, 1990., IEEE International Symposium on},
  pages        = {1372--1375},
  organization = {IEEE}
}
@book{gray2001analysis,
  title     = {Analysis and design of analog integrated circuits},
  author    = {Gray, Paul R and Hurst, Paul and Meyer, Robert G and Lewis, Stephen},
  year      = 2001,
  publisher = {Wiley}
}
@article{tsividis1987switched,
  title     = {Switched-capacitor neural networks},
  author    = {Tsividis, YP and Anastassiou, D},
  year      = 1987,
  journal   = {Electronics Letters},
  publisher = {IET},
  volume    = 23,
  number    = 18,
  pages     = {958--959}
}
@article{fringe:cap,
  title     = {Mismatch characterization of small metal fringe capacitors},
  author    = {Tripathi, Vaibhav and Murmann, Boris},
  year      = 2014,
  journal   = {IEEE Transactions on Circuits and Systems I: Regular Papers},
  publisher = {IEEE},
  volume    = 61,
  number    = 8,
  pages     = {2236--2242}
}
@article{yolov3,
  title   = {Yolov3: An incremental improvement},
  author  = {Redmon, Joseph and Farhadi, Ali},
  year    = 2018,
  journal = {arXiv}
}
@article{hill:amdahl,
  title     = {Amdahl's law in the multicore era},
  author    = {Hill, Mark D and Marty, Michael R},
  year      = 2008,
  journal   = {Computer},
  publisher = {IEEE},
  volume    = 41,
  number    = 7,
  pages     = {33--38}
}
@inproceedings{deepfense:iccad18,
  title     = {Deepfense: Online accelerated defense against adversarial deep learning},
  author    = {Rouhani, Bita Darvish and Samragh, Mohammad and Javaheripi, Mojan and Javidi, Tara and Koushanfar, Farinaz},
  year      = 2018,
  booktitle = {ICCAD}
}
@inproceedings{bpvec:dac20,
  title     = {Bit-Parallel Vector Composability for Neural Acceleration},
  author    = {Ghodrati, Soroush and Sharma, Hardik and Young, Cliff and Kim, Nam Sung and Esmaeilzadeh, Hadi},
  year      = 2020,
  booktitle = {DAC}
}
@inproceedings{bihiwe:pact20,
  title     = {Mixed-Signal Charge-Domain Acceleration of Deep Neural networks through Interleaved Bit-Partitioned Arithmetic},
  author    = {Ghodrati, Soroush and Sharma, Hardik and Kinzer, Sean and Yazdanbakhsh, Amir and Park, Jongse and Kim, Nam Sung and Burger, Doug and Esmaeilzadeh, Hadi},
  year      = 2020,
  booktitle = {PACT}
}
@inproceedings{bitblade:dac19,
  title     = {Bitblade: Area and energy-efficient precision-scalable neural network accelerator with bitwise summation},
  author    = {Ryu, Sungju and Kim, Hyungjun and Yi, Wooseok and Kim, Jae-Joon},
  year      = 2019,
  booktitle = {DAC}
}
@article{encodeep,
  title   = {EncoDeep: Realizing Bit-Flexible Encoding for Deep Neural Networks},
  author  = {Samragh, Mohammad and Javaheripi, Mojan and Koushanfar, Farinaz},
  year    = 2019,
  journal = {TECS}
}
@inproceedings{flexigan:fccm18,
  title     = {Flexigan: An end-to-end solution for fpga acceleration of generative adversarial networks},
  author    = {Yazdanbakhsh, Amir and Brzozowski, Michael and Khaleghi, Behnam and Ghodrati, Soroush and Samadi, Kambiz and Kim, Nam Sung and Esmaeilzadeh, Hadi},
  year      = 2018,
  booktitle = {FCCM}
}
@article{fastwave:iccad19,
  title   = {Fastwave: Accelerating autoregressive convolutional neural networks on fpga},
  author  = {Hussain, Shehzeen and Javaheripi, Mojan and Neekhara, Paarth and Kastner, Ryan and Koushanfar, Farinaz},
  year    = 2020,
  journal = {arXiv}
}
@inproceedings{zhang201518,
  title     = {18.4 A matrix-multiplying ADC implementing a machine-learning classifier directly with data conversion},
  author    = {Zhang, Jintao and Wang, Zhuo and Verma, Naveen},
  year      = 2015,
  booktitle = {ISSCC}
}
@misc{distiller,
  title  = {Neural Network Distiller},
  author = {Neta Zmora and Guy Jacob and Gal Novik},
  year   = 2018,
  month  = jun,
  doi    = {10.5281/zenodo.1297430},
  url    = {https://doi.org/10.5281/zenodo.1297430}
}
@inproceedings{pytorch,
  title     = {Automatic differentiation in PyTorch},
  author    = {Paszke, Adam and Gross, Sam and Chintala, Soumith and Chanan, Gregory and Yang, Edward and DeVito, Zachary and Lin, Zeming and Desmaison, Alban and Antiga, Luca and Lerer, Adam},
  year      = 2017,
  booktitle = {NIPS-W}
}
@article{passive:switch,
  title     = {{Analysis and Design of a Passive Switched-Capacitor Matrix Multiplier for Approximate Computing}},
  author    = {Lee, Edward H and Wong, S Simon},
  year      = 2017,
  journal   = {IEEE Journal of Solid-State Circuits},
  publisher = {IEEE},
  volume    = 52,
  number    = 1,
  pages     = {261--271}
}
@article{thermal,
  title     = {Thermal feasibility of die-stacked processing in memory},
  author    = {Eckert, Yasuko and Jayasena, Nuwan and Loh, Gabriel H},
  year      = 2014,
  publisher = {Citeseer}
}
@article{Mour:charge,
  title     = {Passive charge redistribution digital-to-analogue multiplier},
  author    = {Bankman, Daniel and Murmann, Boris},
  year      = 2015,
  journal   = {Electronics Letters},
  publisher = {IET},
  volume    = 51,
  number    = 5,
  pages     = {386--388}
}
@inproceedings{Mur:mixed,
  title        = {Mixed-signal circuits for embedded machine-learning applications},
  author       = {Murmann, Boris and Bankman, Daniel and Chai, E and Miyashita, Daisuke and Yang, Lita},
  year         = 2015,
  booktitle    = {Signals, Systems and Computers, 2015 49th Asilomar Conference on},
  pages        = {1341--1345},
  organization = {IEEE}
}
@book{analog:layout,
  title     = {Analog VLSI: signal and information processing},
  author    = {Ismail, Mohammed and Fiez, Terri},
  year      = 1994,
  publisher = {McGraw-Hill New York},
  volume    = 166
}
@article{HMC:spec,
  title   = {Hybrid memory cube specification 1.0},
  author  = {Hybrid Memory Cube Consortium and others},
  year    = 2013,
  journal = {Last Revision Jan}
}
@inproceedings{HMC:vlsi,
  title        = {Hybrid memory cube new DRAM architecture increases density and performance},
  author       = {Jeddeloh, Joe and Keeth, Brent},
  year         = 2012,
  booktitle    = {VLSI Technology (VLSIT), 2012 Symposium on},
  pages        = {87--88},
  organization = {IEEE}
}
@article{bitfusion:isca18,
  title   = {Bit Fusion: Bit-Level Dynamically Composable Architecture for Accelerating Deep Neural Networks},
  author  = {Sharma, Hardik and Park, Jongse and Suda, Naveen and Lai, Liangzhen and Chau, Benson and Chandra, Vikas and Esmaeilzadeh, Hadi},
  year    = 2018,
  journal = {ISCA}
}
@inproceedings{redeye:isca16,
  title        = {RedEye: analog ConvNet image sensor architecture for continuous mobile vision},
  author       = {LiKamWa, Robert and Hou, Yunhui and Gao, Julian and Polansky, Mia and Zhong, Lin},
  year         = 2016,
  booktitle    = {ACM SIGARCH Computer Architecture News},
  volume       = 44,
  number       = 3,
  pages        = {255--266},
  organization = {IEEE Press}
}
@inproceedings{bit-pragmatic:micro17,
  title     = {Bit-pragmatic deep neural network computing},
  author    = {Albericio, Jorge and Delm{\'a}s, Alberto and Judd, Patrick and Sharify, Sayeh and O'Leary, Gerard and Genov, Roman and Moshovos, Andreas},
  year      = 2017,
  booktitle = {MICRO}
}
@inproceedings{loom:dac18,
  title     = {Loom: Exploiting weight and activation precisions to accelerate convolutional neural networks},
  author    = {Sharify, Sayeh and Lascorz, Alberto Delmas and Siu, Kevin and Judd, Patrick and Moshovos, Andreas},
  year      = 2018,
  booktitle = {DAC}
}
@inproceedings{tangram:asplos19,
  title     = {Tangram: Optimized coarse-grained dataflow for scalable NN accelerators},
  author    = {Gao, Mingyu and Yang, Xuan and Pu, Jing and Horowitz, Mark and Kozyrakis, Christos},
  year      = 2019,
  booktitle = {ASPLOS}
}
@article{eyerissv2:journal:2019,
  title     = {Eyeriss v2: A flexible accelerator for emerging deep neural networks on mobile devices},
  author    = {Chen, Yu-Hsin and Yang, Tien-Ju and Emer, Joel and Sze, Vivienne},
  year      = 2019,
  journal   = {JETCAS},
  publisher = {IEEE}
}
@article{tanasic:isca14,
  title   = {Enabling preemptive multiprogramming on GPUs},
  author  = {Tanasic, Ivan and Gelado, Isaac and Cabezas, Javier and Ramirez, Alex and Navarro, Nacho and Valero, Mateo},
  year    = 2014,
  journal = {ISCA}
}
@article{maeri:asplos18,
  title   = {Maeri: Enabling flexible dataflow mapping over dnn accelerators via reconfigurable interconnects},
  author  = {Kwon, Hyoukjun and Samajdar, Ananda and Krishna, Tushar},
  year    = 2018,
  journal = {ASPLOS}
}
@inproceedings{maestro:micro19,
  title     = {Understanding Reuse, Performance, and Hardware Cost of DNN Dataflow: A Data-Centric Approach},
  author    = {Kwon, Hyoukjun and Chatarasi, Prasanth and Pellauer, Michael and Parashar, Angshuman and Sarkar, Vivek and Krishna, Tushar},
  year      = 2019,
  booktitle = {MICRO}
}
@inproceedings{polymorphic:micro:2009,
  title     = {Polymorphic pipeline array: a flexible multicore accelerator with virtualized execution for mobile multimedia applications},
  author    = {Park, Hyunchul and Park, Yongjun and Mahlke, Scott},
  year      = 2009,
  booktitle = {MICRO}
}
@article{prema,
  title   = {PREMA: A Predictive Multi-task Scheduling Algorithm For Preemptible Neural Processing Units},
  author  = {Choi, Yujeong and Rhu, Minsoo},
  year    = 2020,
  journal = {HPCA}
}
@inproceedings{clp,
  title        = {Composable lightweight processors},
  author       = {Kim, Changkyu and Sethumadhavan, Simha and Govindan, Madhu S and Ranganathan, Nitya and Gulati, Divya and Burger, Doug and Keckler, Stephen W},
  year         = 2007,
  booktitle    = {40th Annual IEEE/ACM International Symposium on Microarchitecture (MICRO 2007)},
  pages        = {381--394},
  organization = {IEEE}
}
@inproceedings{simba:micro:2019,
  title     = {Simba: Scaling deep-learning inference with multi-chip-module-based architecture},
  author    = {Shao, Yakun Sophia and Clemons, Jason and Venkatesan, Rangharajan and Zimmer, Brian and Fojtik, Matthew and Jiang, Nan and Keller, Ben and Klinefelter, Alicia and Pinckney, Nathaniel and Raina, Priyanka and others},
  year      = 2019,
  booktitle = {MICRO}
}
@article{sigma:hpca:2020,
  title   = {SIGMA: A Sparse and Irregular GEMM Accelerator with Flexible Interconnects for DNN Training},
  author  = {Qin, Eric and Samajdar, Ananda and Kwon, Hyoukjun and Nadella, Vineet and Srinivasan, Sudarshan and Das, Dipankar and Kaul, Bharat and Krishna, Tushar},
  year    = 2020,
  journal = {HPCA}
}
@inproceedings{bit-tactical:asplos19,
  title     = {Bit-tactical: A software/hardware approach to exploiting value and bit sparsity in neural networks},
  author    = {Delmas Lascorz, Alberto and Judd, Patrick and Stuart, Dylan Malone and Poulos, Zissis and Mahmoud, Mostafa and Sharify, Sayeh and Nikolic, Milos and Siu, Kevin and Moshovos, Andreas},
  year      = 2019,
  booktitle = {ASPLOS}
}
@inproceedings{laconic:isca19,
  title     = {Laconic deep learning inference acceleration},
  author    = {Sharify, Sayeh and Lascorz, Alberto Delmas and Mahmoud, Mostafa and Nikolic, Milos and Siu, Kevin and Stuart, Dylan Malone and Poulos, Zissis and Moshovos, Andreas},
  year      = 2019,
  booktitle = {ISCA}
}
@inproceedings{pipelayer:hpca:2017,
  title     = {Pipelayer: A pipelined reram-based accelerator for deep learning},
  author    = {Song, Linghao and Qian, Xuehai and Li, Hai and Chen, Yiran},
  year      = 2017,
  booktitle = {HPCA}
}
@article{marinella2018multiscale,
  title     = {Multiscale co-design analysis of energy, latency, area, and accuracy of a ReRAM analog neural training accelerator},
  author    = {Marinella, Matthew J and Agarwal, Sapan and Hsia, Alexander and Richter, Isaac and Jacobs-Gedrim, Robin and Niroula, John and Plimpton, Steven J and Ipek, Engin and James, Conrad D},
  year      = 2018,
  journal   = {IEEE Journal on Emerging and Selected Topics in Circuits and Systems},
  publisher = {IEEE},
  volume    = 8,
  number    = 1,
  pages     = {86--101}
}
@inproceedings{atomlayer:dac:2018,
  title     = {Atomlayer: a universal reRAM-based CNN accelerator with atomic layer computation},
  author    = {Qiao, Ximing and Cao, Xiong and Yang, Huanrui and Song, Linghao and Li, Hai},
  booktitle = {DAC}
}
@article{long2018reram,
  title     = {ReRAM-Based Processing-in-Memory Architecture for Recurrent Neural Network Acceleration},
  author    = {Long, Yun and Na, Taesik and Mukhopadhyay, Saibal},
  year      = 2018,
  journal   = {IEEE Transactions on Very Large Scale Integration (VLSI) Systems},
  publisher = {IEEE},
  number    = 99,
  pages     = {1--14}
}
@inproceedings{li2018reram,
  title        = {ReRAM-based accelerator for deep learning},
  author       = {Li, Bing and Song, Linghao and Chen, Fan and Qian, Xuehai and Chen, Yiran and Li, Hai Helen},
  year         = 2018,
  booktitle    = {Design, Automation \& Test in Europe Conference \& Exhibition (DATE), 2018},
  pages        = {815--820},
  organization = {IEEE}
}
@inproceedings{ji2018recom,
  title        = {ReCom: An efficient resistive accelerator for compressed deep neural networks},
  author       = {Ji, Houxiang and Song, Linghao and Jiang, Li and Li, Hai Halen and Chen, Yiran},
  year         = 2018,
  booktitle    = {Design, Automation \& Test in Europe Conference \& Exhibition (DATE), 2018},
  pages        = {237--240},
  organization = {IEEE}
}
@book{survey,
  title     = {ADC Performance Survey 1997-2016},
  author    = {B. Murmann},
  publisher = {murmann/adcsurvey.html},
  address   = {[Online]. Available},
  url       = {http://web.stanford.edu/}
}
@inproceedings{cicc18:ADC,
  title        = {A 0.0013 mm2 10b 10MS/s SAR ADC with a 0.0048 mm2 42dB-rejection passive FIR filter},
  author       = {Harpe, Pieter},
  year         = 2018,
  booktitle    = {2018 IEEE Custom Integrated Circuits Conference, CICC 2018},
  organization = {Institute of Electrical and Electronics Engineers Inc.}
}
@inproceedings{orion,
  title        = {ORION 2.0: A fast and accurate NoC power and area model for early-stage design space exploration},
  author       = {Kahng, Andrew B and Li, Bin and Peh, Li-Shiuan and Samadi, Kambiz},
  year         = 2009,
  booktitle    = {Proceedings of the conference on Design, Automation and Test in Europe},
  pages        = {423--428},
  organization = {European Design and Automation Association}
}
@inproceedings{promise:isca18,
  title        = {PROMISE: An End-to-End Design of a Programmable Mixed-Signal Accelerator for Machine-Learning Algorithms},
  author       = {Srivastava, Prakalp and Kang, Mingu and Gonugondla, Sujan K and Lim, Sungmin and Choi, Jungwook and Adve, Vikram and Kim, Nam Sung and Shanbhag, Naresh},
  year         = 2018,
  booktitle    = {2018 ACM/IEEE 45th Annual International Symposium on Computer Architecture (ISCA)},
  organization = {IEEE}
}
@article{timedomain:jssc2017,
  title     = {A neuromorphic chip optimized for deep learning and cmos technology with time-domain analog and digital mixed-signal processing},
  author    = {Miyashita, Daisuke and Kousai, Shouhei and Suzuki, Tomoya and Deguchi, Jun},
  year      = 2017,
  journal   = {IEEE Journal of Solid-State Circuits},
  publisher = {IEEE},
  volume    = 52,
  number    = 10,
  pages     = {2679--2689}
}
@inproceedings{Mour:SCdotproduct,
  title        = {An 8-bit, 16 input, 3.2 pJ/op switched-capacitor dot product circuit in 28-nm FDSOI CMOS},
  author       = {Bankman, Daniel and Murmann, Boris},
  year         = 2016,
  booktitle    = {Solid-State Circuits Conference (A-SSCC), 2016 IEEE Asian},
  pages        = {21--24},
  organization = {IEEE}
}
@inproceedings{Mour:cifar10,
  title        = {An always-on 3.8 $\mu$J/86\% CIFAR-10 mixed-signal binary CNN processor with all memory on chip in 28nm CMOS},
  author       = {Bankman, Daniel and Yang, Lita and Moons, Bert and Verhelst, Marian and Murmann, Boris},
  year         = 2018,
  booktitle    = {Solid-State Circuits Conference-(ISSCC), 2018 IEEE International},
  pages        = {222--224},
  organization = {IEEE}
}
@inproceedings{lca:vlsi2017,
  title        = {A 3.43 TOPS/W 48.9 pJ/pixel 50.1 nJ/classification 512 analog neuron sparse coding neural network with on-chip learning and classification in 40nm CMOS},
  author       = {Buhler, Fred N and Brown, Peter and Li, Jiabo and Chen, Thomas and Zhang, Zhengya and Flynn, Michael P},
  year         = 2017,
  booktitle    = {VLSI Circuits, 2017 Symposium on},
  pages        = {C30--C31},
  organization = {IEEE}
}
@article{scap:amp,
  title     = {Switched-opamp: An approach to realize full CMOS switched-capacitor circuits at very low power supply voltages},
  author    = {Crols, Jan and Steyaert, Michel},
  year      = 1994,
  journal   = {IEEE Journal of Solid-State Circuits},
  publisher = {IEEE},
  volume    = 29,
  number    = 8,
  pages     = {936--942}
}
@article{scap:converter,
  title     = {Comparator-based switched-capacitor circuits for scaled CMOS technologies},
  author    = {Fiorenza, John K and Sepke, Todd and Holloway, Peter and Sodini, Charles G and Lee, Hae-Seung},
  year      = 2006,
  journal   = {IEEE Journal of Solid-State Circuits},
  publisher = {IEEE},
  volume    = 41,
  number    = 12,
  pages     = {2658--2668}
}
@article{scap:filter,
  title     = {MOS switched-capacitor filters},
  author    = {Brodersen, Robert W and Gray, Paul R and Hodges, David A},
  year      = 1979,
  journal   = {Proceedings of the IEEE},
  publisher = {IEEE},
  volume    = 67,
  number    = 1,
  pages     = {61--75}
}
@article{ucnn:isca18,
  title   = {UCNN: Exploiting Computational Reuse in Deep Neural Networks via Weight Repetition},
  author  = {Hegde, Kartik and Yu, Jiyong and Agrawal, Rohit and Yan, Mengjia and Pellauer, Michael and Fletcher, Christopher W},
  year    = 2018,
  journal = {arXiv}
}
@inproceedings{brainwave:isac:2018,
  title     = {A configurable cloud-scale DNN processor for real-time AI},
  author    = {Fowers, Jeremy and Ovtcharov, Kalin and Papamichael, Michael and Massengill, Todd and Liu, Ming and Lo, Daniel and Alkalay, Shlomi and Haselman, Michael and Adams, Logan and Ghandi, Mahdi and others},
  year      = 2018,
  booktitle = {ISCA}
}
@inproceedings{park2018energy,
  title        = {Energy-Efficient Neural Network Accelerator Based on Outlier-Aware Low-Precision Computation},
  author       = {Park, Eunhyeok and Kim, Dongyoung and Yoo, Sungjoo},
  year         = 2018,
  booktitle    = {2018 ACM/IEEE 45th Annual International Symposium on Computer Architecture (ISCA)},
  organization = {IEEE}
}
@inproceedings{neuralcache:isca18,
  title     = {Neural cache: Bit-serial in-cache acceleration of deep neural networks},
  author    = {Eckert, Charles and Wang, Xiaowei and Wang, Jingcheng and Subramaniyan, Arun and Iyer, Ravi and Sylvester, Dennis and Blaaauw, David and Das, Reetuparna},
  year      = 2018,
  booktitle = {ISCA}
}
@article{mobilenets,
  title   = {Mobilenets: Efficient convolutional neural networks for mobile vision applications},
  author  = {Howard, Andrew G and Zhu, Menglong and Chen, Bo and Kalenichenko, Dmitry and Wang, Weijun and Weyand, Tobias and Andreetto, Marco and Adam, Hartwig},
  year    = 2017,
  journal = {arXiv}
}
@inproceedings{yolo9000,
  title     = {YOLO9000: better, faster, stronger},
  author    = {Redmon, Joseph and Farhadi, Ali},
  year      = 2017,
  booktitle = {CVPR}
}
@inproceedings{ssd,
  title     = {Ssd: Single shot multibox detector},
  author    = {Liu, Wei and Anguelov, Dragomir and Erhan, Dumitru and Szegedy, Christian and Reed, Scott and Fu, Cheng-Yang and Berg, Alexander C},
  year      = 2016,
  booktitle = {ECCV}
}
@inproceedings{suresh:distributed,
  title     = {Distributed resource management across process boundaries},
  author    = {Suresh, Lalith and Bodik, Peter and Menache, Ishai and Canini, Marco and Ciucu, Florin},
  year      = 2017,
  booktitle = {SoCC}
}
@inproceedings{timetrader:micro15,
  title     = {Timetrader: Exploiting latency tail to save datacenter energy for online search},
  author    = {Vamanan, Balajee and Sohail, Hamza Bin and Hasan, Jahangir and Vijaykumar, TN},
  year      = 2015,
  booktitle = {MICRO}
}
@article{gnmt,
  title   = {Google's neural machine translation system: Bridging the gap between human and machine translation},
  author  = {Wu, Yonghui and Schuster, Mike and Chen, Zhifeng and Le, Quoc V and Norouzi, Mohammad and Macherey, Wolfgang and Krikun, Maxim and Cao, Yuan and Gao, Qin and Macherey, Klaus and others},
  year    = 2016,
  journal = {arXiv}
}
@inproceedings{analyzing,
  title     = {Analyzing traces from a google data center},
  author    = {Minet, Pascale and Renault, Eric and Khoufi, Ines and Boumerdassi, Selma},
  year      = 2018,
  booktitle = {IWCMC}
}
@article{efficientnet,
  title   = {Efficientnet: Rethinking model scaling for convolutional neural networks},
  author  = {Tan, Mingxing and Le, Quoc V},
  year    = 2019,
  journal = {ICML}
}
@inproceedings{prediction:isca18,
  title        = {Prediction Based Execution on Deep Neural Networks},
  author       = {Song, Mingcong and Zhao, Jiechen and Hu, Yang and Zhang, Jiaqi and Li, Tao},
  year         = 2018,
  booktitle    = {2018 ACM/IEEE 45th Annual International Symposium on Computer Architecture (ISCA)},
  organization = {IEEE}
}
@article{mlperf,
  title   = {Mlperf inference benchmark},
  author  = {Reddi, Vijay Janapa and Cheng, Christine and Kanter, David and Mattson, Peter and Schmuelling, Guenther and Wu, Carole-Jean and Anderson, Brian and Breughe, Maximilien and Charlebois, Mark and Chou, William and others},
  year    = 2019,
  journal = {arxiv}
}
@article{mlperf_training,
  title   = {Mlperf training benchmark},
  author  = {Mattson, Peter and Cheng, Christine and Coleman, Cody and Diamos, Greg and Micikevicius, Paulius and Patterson, David and Tang, Hanlin and Wei, Gu-Yeon and Bailis, Peter and Bittorf, Victor and others},
  year    = 2019,
  journal = {arxiv}
}
@inproceedings{neurocube:isca:16,
  title        = {Neurocube: A programmable digital neuromorphic architecture with high-density 3D memory},
  author       = {Kim, Duckhwan and Kung, Jaeha and Chai, Sek and Yalamanchili, Sudhakar and Mukhopadhyay, Saibal},
  year         = 2016,
  booktitle    = {Computer Architecture (ISCA), 2016 ACM/IEEE 43rd Annual International Symposium on},
  pages        = {380--392},
  organization = {IEEE}
}
@inproceedings{kim2012digital,
  title        = {A digital neuromorphic VLSI architecture with memristor crossbar synaptic array for machine learning},
  author       = {Kim, Yongtae and Zhang, Yong and Li, Peng},
  year         = 2012,
  booktitle    = {SOC Conference (SOCC), 2012 IEEE International},
  pages        = {328--333},
  organization = {IEEE}
}
@inproceedings{liu2014heterogeneous,
  title        = {A heterogeneous computing system with memristor-based neuromorphic accelerators},
  author       = {Liu, Xiaoxiao and Mao, Mengjie and Li, Hai and Chen, Yiran and Jiang, Hao and Yang, J Joshua and Wu, Qing and Barnell, Mark},
  year         = 2014,
  booktitle    = {High Performance Extreme Computing Conference (HPEC), 2014 IEEE},
  pages        = {1--6},
  organization = {IEEE}
}
@inproceedings{liu2015reno,
  title        = {RENO: A high-efficient reconfigurable neuromorphic computing accelerator design},
  author       = {Liu, Xiaoxiao and Mao, Mengjie and Liu, Beiye and Li, Hai and Chen, Yiran and Li, Boxun and Wang, Yu and Jiang, Hao and Barnell, Mark and Wu, Qing and others},
  year         = 2015,
  booktitle    = {Design Automation Conference (DAC), 2015 52nd ACM/EDAC/IEEE},
  pages        = {1--6},
  organization = {IEEE}
}
@article{prezioso2015training,
  title     = {Training and operation of an integrated neuromorphic network based on metal-oxide memristors},
  author    = {Prezioso, Mirko and Merrikh-Bayat, Farnood and Hoskins, BD and Adam, GC and Likharev, Konstantin K and Strukov, Dmitri B},
  year      = 2015,
  journal   = {Nature},
  publisher = {Nature Publishing Group},
  volume    = 521,
  number    = 7550,
  pages     = 61
}
@inproceedings{chi2016prime,
  title        = {Prime: A novel processing-in-memory architecture for neural network computation in reram-based main memory},
  author       = {Chi, Ping and Li, Shuangchen and Xu, Cong and Zhang, Tao and Zhao, Jishen and Liu, Yongpan and Wang, Yu and Xie, Yuan},
  year         = 2016,
  booktitle    = {ACM SIGARCH Computer Architecture News},
  volume       = 44,
  number       = 3,
  pages        = {27--39},
  organization = {IEEE Press}
}
@inproceedings{li2014training,
  title        = {Training itself: Mixed-signal training acceleration for memristor-based neural network},
  author       = {Li, Boxun and Wang, Yuzhi and Wang, Yu and Chen, Yiran and Yang, Huazhong},
  year         = 2014,
  booktitle    = {Design Automation Conference (ASP-DAC), 2014 19th Asia and South Pacific},
  pages        = {361--366},
  organization = {IEEE}
}
@inproceedings{chen2017accelerator,
  title        = {Accelerator-friendly neural-network training: learning variations and defects in RRAM crossbar},
  author       = {Chen, Lerong and Li, Jiawen and Chen, Yiran and Deng, Qiuping and Shen, Jiyuan and Liang, Xiaoyao and Jiang, Li},
  year         = 2017,
  booktitle    = {Proceedings of the Conference on Design, Automation \& Test in Europe},
  pages        = {19--24},
  organization = {European Design and Automation Association}
}
@inproceedings{pive,
  title     = {Apolo: Making Sense of Large Network Data by Combining Rich User Interaction and Machine Learning},
  author    = {Jaegul Choo and Changhyun Lee and Hannah Kim and Hanseung Lee and Barry L. Drake and Haesun Park},
  year      = 2014,
  booktitle = {VAST}
}
@inproceedings{apolo,
  title     = {Apolo: Making Sense of Large Network Data by Combining Rich User Interaction and Machine Learning},
  author    = {Duen Horng (Polo) Chau and Aniket Kittur and Jason I. Hong and Christos Faloutsos},
  year      = 2011,
  booktitle = {CHI}
}
@inproceedings{axilog:date,
  title     = {Axilog: Language Support for Approximate Hardware Design},
  author    = {Yazdanbakhsh, Amir and Mahajan, Divya and Thwaites, Bradley and Park, Jongse and Nagendrakumar, Anandhavel and Sethuraman, Sindhuja and Ramkrishnan, Kartik and Ravindran, Nishanthi and Jariwala, Rudra and Rahimi, Abbas and Esmaeilzadeh, Hadi and Bazargan, Kia},
  year      = 2015,
  booktitle = {DATE}
}
@article{herding,
  title   = {Branch and Data Herding: Reducing Control and Memory Divergence for Error-Tolerant GPU Applications},
  author  = {Sartori, J. and Kumar, R.},
  year    = 2013,
  journal = {IEEE Transactions on Multimedia},
  volume  = 15,
  number  = 2
}
@inproceedings{lva,
  title     = {Load Value Approximation},
  author    = {Joshua San Miguel and Mario Badr and Natalie Enright Jerger},
  year      = 2014,
  booktitle = {MICRO}
}
@inproceedings{isca14memo,
  title     = {Eliminating Redundant Fragment Shader Executions on a Mobile GPU via Hardware Memoization},
  author    = {Arnau, Jose-Maria and Parcerisa, Joan-Manuel and Xekalakis, Polychronis},
  year      = 2014,
  booktitle = {ISCA}
}
@inproceedings{barowy2012automan,
  title     = {Automan: A platform for integrating human-based and digital computation},
  author    = {Barowy, Daniel W and Curtsinger, Charlie and Berger, Emery D and McGregor, Andrew},
  year      = 2012,
  booktitle = {OOPSLA}
}
@article{cooper2010predicting,
  title     = {Predicting protein structures with a multiplayer online game},
  author    = {Seth Cooper and Firas Khatib and Adrien Treuille and Janos Barbero and	Jeehyung Lee and	Michael Beenen and Andrew Leaver-Fay and	 David Baker and	Zoran Popović and Foldit players},
  year      = 2010,
  journal   = {Nature},
  publisher = {Nature Publishing Group},
  volume    = 466,
  number    = 7307,
  pages     = {756--760}
}
@article{von2008recaptcha,
  title     = {recaptcha: Human-based character recognition via web security measures},
  author    = {Von Ahn, Luis and Maurer, Benjamin and McMillen, Colin and Abraham, David and Blum, Manuel},
  year      = 2008,
  journal   = {Science},
  publisher = {American Association for the Advancement of Science},
  volume    = 321,
  number    = 5895,
  pages     = {1465--1468}
}
@article{russell2008labelme,
  title     = {LabelMe: a database and web-based tool for image annotation},
  author    = {Russell, Bryan C and Torralba, Antonio and Murphy, Kevin P and Freeman, William T},
  year      = 2008,
  journal   = {International Journal of Computer Vision},
  publisher = {Springer},
  volume    = 77,
  number    = {1-3},
  pages     = {157--173}
}
@inproceedings{quinn2011human,
  title     = {Human computation: a survey and taxonomy of a growing field},
  author    = {Quinn, Alexander J and Bederson, Benjamin B},
  year      = 2011,
  booktitle = {SIGCHI}
}
@inproceedings{von2004labeling,
  title     = {Labeling images with a computer game},
  author    = {Von Ahn, Luis and Dabbish, Laura},
  year      = 2004,
  booktitle = {CHI}
}
@inproceedings{von2006peekaboom,
  title     = {Peekaboom: a game for locating objects in images},
  author    = {Von Ahn, Luis and Liu, Ruoran and Blum, Manuel},
  year      = 2006,
  booktitle = {CHI}
}
@article{von2006games,
  title     = {Games with a purpose},
  author    = {Von Ahn, Luis},
  year      = 2006,
  journal   = {Computer},
  publisher = {IEEE},
  volume    = 39,
  number    = 6,
  pages     = {92--94}
}
@inproceedings{little2010turkit,
  title     = {Turkit: human computation algorithms on mechanical turk},
  author    = {Little, Greg and Chilton, Lydia B and Goldman, Max and Miller, Robert C},
  year      = 2010,
  booktitle = {UIST}
}
@inproceedings{dietl2012verification,
  title     = {Verification games: Making verification fun},
  author    = {Dietl, Werner and Dietzel, Stephanie and Ernst, Michael D and Mote, Nathaniel and Walker, Brian and Cooper, Seth and Pavlik, Timothy and Popovi{\'c}, Zoran},
  year      = 2012,
  booktitle = {FTfJP}
}
@article{mnist,
  title   = {{MNIST Handwritten Digit Database}},
  author  = {LeCun, Yann and Cortes, Corinna and Burges, CJ},
  year    = 2010,
  journal = {AT\&T Labs [Online]. Available: http://yann. lecun. com/exdb/mnist},
  volume  = 2
}
@misc{mturk,
  title        = {Amazon's Mechanical Turk},
  howpublished = {\url{https://www.mturk.com}}
}
@inproceedings{imagenet,
  title     = {ImageNet: A Large-Scale Hierarchical Image Database},
  author    = {Deng, J. and Dong, W. and Socher, R. and Li, L.-J. and Li, K. and Fei-Fei, L.},
  year      = 2009,
  booktitle = {CVPR},
  url       = {http://image-net.org/}
}
@book{f-dist,
  title     = {Probability and Statistics},
  author    = {DeGroot, Morris H.},
  year      = 1974,
  publisher = {Chapman \& Hall}
}
@book{normal-interval,
  title     = {Theoretical statistics},
  author    = {Cox, David},
  year      = 1974,
  publisher = {Chapman \& Hall}
}
@article{binomial-interval,
  title   = {Binomial confidence intervals and contingency tests: mathematical fundamentals and the evaluation of alternative methods},
  author  = {Wallis, Sean},
  year    = 2013,
  month   = {July.},
  journal = {Journal of Quantitative Linguistics},
  volume  = 20,
  number  = 3,
  pages   = {178--208}
}
@article{clopper-pearson,
  title   = {The Use of Confidence or Fiducial Limits Illustrated in the Case of the Binomial},
  author  = {Hardavellas, N. and Ferdman, M. and Falsafi, B. and Ailamaki, A.},
  year    = 1934,
  month   = {Dec.},
  journal = {Biometrika},
  volume  = 26,
  number  = 4,
  pages   = {404--413}
}
@article{wilson,
  title   = {Probable inference, the law of succession, and statistical inference},
  author  = {Wilson, Edwin B.},
  year    = 1927,
  month   = {Jun.},
  journal = {Journal of the American Statistical Association},
  volume  = 22,
  number  = 158,
  pages   = {209--212}
}
@inproceedings{imagetoword,
  title     = {Image-to-Word Transformation Based on Dividing and Vector Quantizing Images With Words},
  author    = {Yasuhide Mori Hironobu and Hironobu Takahashi and Ryuichi Oka},
  year      = 1999,
  booktitle = {MISRM}
}
@inproceedings{imageannot,
  title     = {Image annotation using metric learning in semantic neighbourhoods},
  author    = {Yashaswi Verma and C. V. Jawahar},
  year      = 2012,
  booktitle = {ECCV}
}
@inproceedings{bpci-comparison,
  title     = {ESTIMATING COMPLETION RATES FROM SMALL SAMPLES USING BINOMIAL CONFIDENCE INTERVALS: COMPARISONS AND RECOMMENDATIONS},
  author    = {Sauro, Jeff and Lewis, James R.},
  year      = 2005,
  booktitle = {HFES}
}
@inproceedings{flexjava,
  title     = {FlexJava: Language Support for Safe and Modular Approximate Programming},
  author    = {Park, Jongse and Esmaeilzadeh, Hadi and Zhang, Xin and Naik, Mayur and Harris, William},
  year      = 2015,
  booktitle = {FSE}
}
@inproceedings{approxhadoop,
  title     = {ApproxHadoop: Bringing Approximations to MapReduce Frameworks},
  author    = {Goiri, Inigo and Bianchini, Ricardo and Nagarakatte, Santosh and Nguyen, Thu D.},
  year      = 2015,
  booktitle = {ASPLOS}
}
@misc{data-growth,
  title        = {Extracting Value from Chaos},
  author       = {John Gantz and David Reinsel},
  publisher    = {EMC Corporation},
  howpublished = {\url{http://www.emc.com/collateral/analyst-reports/idc-extracting-value-from-chaos-ar.pdf}}
}
@inproceedings{onthefly,
  title     = {On-the-fly Detection of Instability Problems in Floating-Point Program Execution},
  author    = {Bao, T and Zhang, X},
  year      = 2013,
  booktitle = {OOPSLA}
}
@inproceedings{chisel,
  title     = {Chisel: Reliability- and Accuracy-Aware Optimization of Approximate Computational Kernels},
  author    = {S. Misailovic and M. Carbin and S. Achour and Z. Qi and M. Rinard},
  year      = 2014,
  booktitle = {OOPSLA}
}
@inproceedings{anpu,
  title     = {General-Purpose Code Acceleration with Limited-Precision Analog Computation},
  author    = {St. Amant, Ren{\'e}e and Yazdanbakhsh, Amir and Park, Jongse and Thwaites, Bradley and Esmaeilzadeh, Hadi and Hassibi, Arjang and Ceze, Luis and Burger, Doug},
  year      = 2014,
  booktitle = {ISCA}
}
@article{dark_silicon:babak,
  title   = {Toward Dark Silicon in Servers},
  author  = {Hardavellas, N. and Ferdman, M. and Falsafi, B. and Ailamaki, A.},
  year    = 2011,
  month   = {July--Aug.},
  journal = {IEEE Micro},
  volume  = 31,
  number  = 4,
  pages   = {6--15}
}
@book{razavi-book,
  title     = {Principles of data conversion system design},
  author    = {Razavi, Behzad},
  year      = 1995,
  publisher = {IEEE press}
}
@article{gprof,
  title   = {Gprof: A Call Graph Execution Profiler},
  author  = {Graham, Susan L. and Kessler, Peter B. and McKusick, Marshall K.},
  year    = 2004,
  month   = apr,
  journal = {ACM SIGPLAN Notices - Best of PLDI 1979-1999},
  volume  = 39,
  number  = 4,
  pages   = {49--57}
}
@misc{zynq,
  title  = {Zynq-7000 All Programmable SoC},
  author = {{Xilinx}},
  year   = 2014,
  url    = {http://www.xilinx.com/products/silicon-devices/soc/zynq-7000/}
}
@article{avx,
  title   = {Introduction to Intel advanced vector extensions},
  author  = {Lomont, Chris},
  year    = 2011,
  journal = {Intel White Paper}
}
@inproceedings{cgras,
  title     = {Elastic CGRAs},
  author    = {Yuanjie Huang and Paolo Ienne and Olivier Temam and Yunji Chen and Chengyong Wu},
  year      = 2013,
  booktitle = {FPGA}
}
@inproceedings{olivier:isca13,
  title     = {Continuous Real-World Inputs Can Open Up Alternative Accelerator Designs},
  author    = {Bilel Belhadj and Antoine Joubert and Zheng Li and Rodolphe H{\'e}liot and Olivier Temam},
  year      = 2013,
  booktitle = {ISCA}
}
@article{flash-adc,
  title   = {Ultrasensitive analog to digital converter using {Josephson} junctions},
  author  = {Zappe, HH},
  year    = 1975,
  journal = {IBM Tech. Disc. Bull},
  volume  = 17,
  pages   = {3053--3054}
}
@book{sigma-delta,
  title     = {Oversampling delta-sigma data converters: theory, design, and simulation},
  author    = {Candy, James C and Temes, Gabor C},
  year      = 1962,
  publisher = {Wiley-IEEE Press}
}
@inproceedings{uci-hhar,
  title     = {Smart devices are different: Assessing and mitigating mobile sensing heterogeneities for activity recognition},
  author    = {A. Stisen and H. Blunck and S. Bhattacharya and T. S. Prentow and M. B. Kj{\ae}rgaard and A. Dey and T. Sonne and M. M. Jensen},
  year      = 2015,
  booktitle = {Sensys}
}
@article{memristor,
  title   = {The missing memristor found},
  author  = {Strukov, Dmitri B and Snider, Gregory S and Stewart, Duncan R and Williams, R Stanley},
  year    = 2008,
  journal = {nature},
  volume  = 453,
  number  = 7191,
  pages   = {80--83}
}
@article{memristor-classification,
  title   = {Pattern classification by memristive crossbar circuits using ex situ and in situ training},
  author  = {Alibart, Fabien and Zamanidoost, Elham and Strukov, Dmitri B},
  year    = 2013,
  journal = {Nature Communications},
  volume  = 4
}
@article{memristor-model,
  title   = {Generalized memristive device SPICE model and its application in circuit design},
  author  = {Yakopcic, Chris and Taha, Tarek M and Subramanyam, Guru and Pino, Robinson E},
  year    = 2013,
  journal = {IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},
  volume  = 32,
  number  = 8,
  pages   = {1201--1214}
}
@inproceedings{nnsp:mixdes,
  title     = {NNEP, Design Pattern for Neural-Network-Based Embedded Systems},
  author    = {Esmaeilzadeh, Hadi and Jamali, Mohammad-Reza and Saeedi, Pouya and Moghimi, Amir and Lucas, Caro and Fakhraie, Sied Mehdi},
  year      = 2007,
  booktitle = {MIXDES}
}
@article{hts:integration,
  title   = {A parameterized graph-based framework for high-level test synthesis},
  author  = {Saeed Safari and Amir-Hossein Jahangir and Hadi Esmaeilzadeh},
  year    = 2006,
  month   = jul,
  journal = {Integration, the VLSI Journal},
  volume  = 39,
  number  = 4,
  pages   = {363--381},
  ee      = {http://dx.doi.org/10.1016/j.vlsi.2005.08.004}
}
@inproceedings{nnsp:iscas,
  title     = {Neural network stream processing core (NnSP) for embedded systems},
  author    = {Hadi Esmaeilzadeh and Pooya Saeedi and Babak Nadjar Araabi and Caro Lucas and Seid Mehdi Fakhraie},
  year      = 2006,
  booktitle = ISCAS
}
@inproceedings{cim:iscas,
  title     = {DCim++: a C++ library for object oriented hardware design and distributed simulation},
  author    = {Hadi Esmaeilzadeh and Amir Moghimi and Eiman Ebrahimi and Caro Lucas and Zainalabedin Navabi and Seid Mehdi Fakhraie},
  year      = 2006,
  booktitle = ISCAS
}
@inproceedings{hts:icesi,
  title     = {Co-Evolutionary Scheduling and Mapping for High-Level Synthesis},
  author    = {Banaiyan, Abbas and Esmaeilzadeh, Hasi and Safari, Saeed},
  year      = 2006,
  booktitle = {ICEIS}
}
@article{tis:todaes,
  title   = {Instruction-level test methodology for CPU core self-testing},
  author  = {Saeed Shamshiri and Hadi Esmaeilzadeh and Zainalabedin Navabi},
  year    = 2005,
  month   = oct,
  journal = {ACM Transactions on Design Automation of Electronic Systems (TODAES), special issue on ``Design Validation of Large Systems'' \textbf{(invited)}},
  volume  = 10,
  number  = 4,
  pages   = {673--689},
  ee      = {http://doi.acm.org/10.1145/1109118.1109124}
}
@inproceedings{isc:ats,
  title     = {{ISC}: Reconfigurable Scan-Cell Architecture for Low Power Testing},
  author    = {Hadi Esmaeilzadeh and Saeed Shamshiri and Pooya Saeedi and Zainalabedin Navabi},
  year      = 2005,
  booktitle = {ATS}
}
@article{cuda,
  title  = {Compute unified device architecture ({CUDA}) programming guide},
  author = {Nvidia},
  year   = 2007
}
@misc{earsketch,
  title  = {Computational music remixing and sharing as a tool to drive engagement and interest in computing},
  author = {{EarSketch}},
  year   = 2014,
  url    = {http://earsketch.gatech.edu}
}
@misc{act-course,
  title  = {{CS} 8803 {ACT}: Alternative Computing Technologies},
  author = {Hadi Esmaeilzadeh},
  year   = 2014,
  url    = {http://www.cc.gatech.edu/~Hadi.Esmaeilzadeh/teaching/act/01-2014sp/index.html}
}
@inproceedings{nnsp:mwscas,
  title     = {{NnSP}: Embedded neural networks stream processor},
  author    = {Esmaeilzadeh, Hadi and Farzan, Farhang and Shahidi, Neda and Fakhraie, Seid Mehdi and Lucas, Caro and Tehranipoor, Mohammad},
  year      = 2005,
  booktitle = {MWSCAS}
}
@inproceedings{nnsp:icm,
  title     = {Digital implementation for conic section function networks},
  author    = {Esmaeilzadeh, Hadi and Farshbaf, Hamed and Lucas, Caro and Fakhraie,  Seid Mehdi},
  year      = 2004,
  booktitle = {ICM}
}
@inproceedings{path_planning:icci,
  title     = {Memetic Algorithm Based Path Planning for a Mobile Robot},
  author    = {Neda Shahidi and Hadi Esmaeilzadeh and Marziye Abdollahi and Caro Lucas},
  year      = 2004,
  booktitle = {ICCI}
}
@inproceedings{tis:ats,
  title     = {Test Instruction Set ({TIS}) for High Level Self-Testing of CPU Cores},
  author    = {Saeed Shamshiri and Hadi Esmaeilzadeh and Zainalabedin Navabi},
  year      = 2004,
  booktitle = {ATS}
}
@inproceedings{tis:hldvt,
  title     = {Instruction level test methodology for CPU core software-based self-testing},
  author    = {Saeed Shamshiri and Hadi Esmaeilzadeh and Zainalabedin Navabi},
  year      = 2004,
  booktitle = {HLDVT}
}
@article{cim:ijsit,
  title   = {{Cim++}: A {C++} library for object oriented hardware design},
  author  = {Esmaeilzadeh, Hadi and Shahidi, Neda and Ebrahimi, Eiman and Moghimi, Amir and Lucas, Caro and Navabi, Zainalabedin},
  year    = 2004,
  month   = sep,
  journal = {International Journal of Science and Information Technology (IJSIT), Lecture Notes of 1st International Conference on Informatics},
  volume  = 1,
  number  = 2,
  pages   = {35--41}
}
@inproceedings{hts:iscas,
  title     = {A novel improvement technique for high-level test synthesis},
  author    = {Saeed Safari and Hadi Esmaeilzadeh and Amir-Hossein Jahangir},
  year      = 2003,
  booktitle = ISCAS
}
@inproceedings{catapult,
  title     = {A Reconfigurable Fabric for Accelerating Large-Scale Datacenter Services},
  author    = {Andrew Putnam and Adrian Caulfield and Eric Chung and Derek Chiou and Kypros Constantinides and John Demme and Hadi Esmaeilzadeh and Jeremy Fowers and Gopi Prashanth and Jan Gray and Michael Haselman and Scott Hauck and Stephen Heil and Amir Hormati and Joo-Young Kim and Sitaram Lanka and James R. Larus and Eric Peterson and Aaron Smith and Jason Thong and Phillip Yi Xiao and Doug Burger},
  year      = 2014,
  month     = jun,
  booktitle = isca
}
@inproceedings{rely,
  title     = {Verifying Quantitative Reliability for Programs that Execute on Unreliable Hardware},
  author    = {Michael Carbin and Sasa Misailovic and Martin Rinard},
  year      = 2013,
  booktitle = {OOPSLA}
}
@article{dennard_scaling,
  title   = {Design of Ion-Implanted {MOSFET}'s with Very Small Physical Dimensions},
  author  = {R. H. Dennard and F. H. Gaensslen and V. L. Rideout and E. Bassous and A. R. LeBlanc},
  year    = 1974,
  journal = {JSSC}
}
@inproceedings{dark_silicon:isca,
  title     = {Dark Silicon and the End of Multicore Scaling},
  author    = {Esmaeilzadeh, Hadi and Blem, Emily and St. Amant, Renee and Sankaralingam, Karthikeyan and Burger, Doug},
  year      = 2011,
  booktitle = {ISCA}
}
@inproceedings{confidence-website,
  title     = {Statistics and Data Analysis},
  author    = {A. Tamhane and D. Dunlop},
  year      = 2000,
  booktitle = {Prentice-Hall}
}
@inproceedings{quora,
  title     = {Quality Programmable Vector Processors for Approximate Computing},
  author    = {Venkataramani, Swagath and Chippa, Vinay K. and Chakradhar, Srimat T. and Roy, Kaushik and Raghunathan, Anand},
  year      = 2013,
  booktitle = {MICRO}
}
@inproceedings{aslan,
  title     = {ASLAN: Synthesis of Approximate Sequential Circuits},
  author    = {Ranjan, Ashish and Raha, Arnab and Venkataramani, Swagath and Roy, Kaushik and Raghunathan, Anand},
  year      = 2014,
  booktitle = {DATE}
}
@inproceedings{salsa,
  title     = {SALSA: Systematic Logic Synthesis of Approximate Circuits},
  author    = {Venkataramani, Swagath and Sabne, Amit and Kozhikkottu, Vivek and Roy, Kaushik and Raghunathan, Anand},
  year      = 2012,
  booktitle = {DAC}
}
@inproceedings{flash,
  title     = {Approximate Storage in Solid-state Memories},
  author    = {Sampson, Adrian and Nelson, Jacob and Strauss, Karin and Ceze, Luis},
  year      = 2013,
  booktitle = {MICRO}
}
@inproceedings{petabricks,
  title     = {PetaBricks: a language and compiler for algorithmic choice},
  author    = {Ansel, Jason and Chan, Cy and Wong, Yee Lok and Olszewski, Marek and Zhao, Qin and Edelman, Alan and Amarasinghe, Saman},
  year      = 2009,
  booktitle = {PLDI}
}
@inproceedings{orshansky:synthesis,
  title     = {Approximate logic synthesis under general error magnitude and frequency constraints},
  author    = {Jin Miao and Gerstlauer, A. and Orshansky, M.},
  year      = 2013,
  booktitle = {ICCAD}
}
@inproceedings{abacus,
  title     = {{ABACUS}: A Technique for Automated Behavioral Synthesis of Approximate Computing Circuits},
  author    = {Nepal, Kumud and Li, Yueting and Bahar, R. Iris and Reda, Sherief},
  year      = 2014,
  booktitle = {DATE}
}
@article{parsimonious,
  title   = {Synthesizing Parsimonious Inexact Circuits Through Probabilistic Design Techniques},
  author  = {Lingamneni, Avinash and Enz, Christian and Palem, Krishna and Piguet, Christian},
  year    = 2013,
  month   = may,
  journal = {ACM Transactions on Embedded Computing Systems},
  volume  = 12,
  number  = {2s},
  pages   = {93:1--93:26}
}
@inproceedings{parsimonious:conf,
  title     = {Algorithmic Methodologies for Ultra-efficient Inexact Architectures for Sustaining Technology Scaling},
  author    = {Lingamneni, Avinash and Muntimadugu, Kirthi Krishna and Enz, Christian and Karp, Richard M. and Palem, Krishna V. and Piguet, Christian},
  year      = 2012,
  booktitle = {CF}
}
@inproceedings{sage,
  title     = {SAGE: Self-Tuning Approximation for Graphics Engines},
  author    = {M. Samadi and J. Lee and D. Jamshidi and A. Hormati and S. Mahlke},
  year      = 2013,
  booktitle = {MICRO}
}
@inproceedings{rfvp,
  title     = {Rollback-Free Value Prediction with Approximate Loads},
  author    = {Bradley Thwaites and Gennady Pekhimenko and Amir Yazdanbakhsh and Jongse Park and Girish Mururu and Hadi Esmaeilzadeh and Onur Mutlu and Todd Mowry},
  year      = 2014,
  booktitle = {PACT}
}
@inproceedings{paraprox,
  title     = {Paraprox: Pattern-Based Approximation for Data Parallel Applications},
  author    = {Mehrzad Samadi and Davoud Jamshidi and Janghaeng Lee and Scott Mahlke},
  year      = 2014,
  booktitle = {ASPLOS}
}
@inproceedings{passert,
  title     = {Expressing and Verifying Probabilistic Assertions},
  author    = {Adrian Sampson and Pavel Panchekha and Todd Mytkowicz and Kathryn McKinley and Dan Grossman and Luis Ceze},
  year      = 2014,
  booktitle = {PLDI}
}
@inproceedings{uncertain-t,
  title     = {Uncertain$<${T}$>$: A First-Order Type for Uncertain Data},
  author    = {J. Bornholt and T. Mytkowicz and K. McKinley},
  year      = 2014,
  booktitle = {ASPLOS}
}
@inproceedings{genetic-optimization,
  title     = {Post-compiler Software Optimization for Reducing Energy},
  author    = {Eric Schulte and Jonathan Dorn and Stephen Harding and Stephanie Forrest and Westly Weimer},
  year      = 2014,
  booktitle = {ASPLOS}
}
@misc{r2-tools,
  title        = {R2 tools},
  howpublished = {https://www.dropbox.com/s/rmcrrbpcruemc21/r2.tar.gz}
}
@misc{enerj-tools,
  title        = {EnerJ tools},
  author       = {Adrian Sampson and Werner Dietl and Emily Fortuna and Danushen Gnanapragasam and Luis Ceze and and Dan Grossman},
  howpublished = {{\scriptsize \url{http://approxbench.org}}}
}
@inproceedings{chord,
  title     = {Large-scale configurable static analysis},
  author    = {Mayur Naik},
  year      = 2014,
  booktitle = {Proceedings of the 3rd {ACM} {SIGPLAN} International Workshop on the State Of the Art in Java Program Analysis (SOAP'14)}
}
@inproceedings{rubio:sc13,
  title     = {Precimonious: tuning assistant for floating-point precision},
  author    = {Cindy Rubio-Gonz{\'a}lez and Cuong Nguyen and Hong Diep Nguyen and James Demmel and William Kahan and Koushik Sen and David H. Bailey and Costin Iancu and David Hough},
  year      = 2013,
  booktitle = {SC}
}
@inproceedings{zhang:pldi13,
  title     = {Finding Optimum Abstractions in Parametric Dataflow Analysis},
  author    = {Xin Zhang and Mayur Naik and Hongseok Yang},
  year      = 2013,
  booktitle = {PLDI}
}
@inproceedings{zhang:oopsla13,
  title     = {On-the-fly detection of instability problems in floating-point program execution},
  author    = {Tao Bao and Xiangyu Zhang},
  year      = 2013,
  booktitle = {OOPSLA}
}
@inproceedings{approxstorage,
  title     = {Approximate Storage in Solid-State Memories},
  author    = {Adrian Sampson and Jacob Nelson and Karin Strauss and Luis Ceze},
  year      = 2013,
  booktitle = {MICRO}
}
@inproceedings{beret,
  title     = {Bundled Execution of Recurring Traces for Energy-Efficient General Purpose Processing},
  author    = {Shantanu Gupta and Shuguang Feng and Amin Ansari and Scott Mahlke and David August},
  year      = 2011,
  booktitle = {MICRO}
}
@inproceedings{dyser,
  title     = {Dynamically Specialized Datapaths for Energy Efficient Computing},
  author    = {Venkatraman Govindaraju and Chen-Han Ho and Karthikeyan Sankaralingam},
  year      = 2011,
  booktitle = {HPCA}
}
@inproceedings{ccores,
  title     = {Conservation cores: Reducing the energy of mature computations},
  author    = {Venkatesh, Ganesh and Sampson, Jack and Goulding, Nathan and Garcia, Saturnino and Bryksin, Vladyslav and Lugo-Martinez, Jose and Swanson, Steven and Taylor, Michael Bedford},
  year      = 2010,
  booktitle = {ASPLOS}
}
@inproceedings{block_structured_isa,
  title     = {Exploiting fine-grained parallelism through a combination of hardware and software techniques},
  author    = {Melvin, Stephen and Patt, Yale},
  year      = 1991,
  booktitle = {Proceedings of the 18th annual international symposium on Computer architecture},
  pages     = {287--296}
}
@article{npu:cacm,
  title   = {Neural Acceleration for General-Purpose Approximate Programs},
  author  = {Esmaeilzadeh, Hadi and Sampson, Adrian and Ceze, Luis and Burger, Doug},
  year    = 2013,
  journal = {to apear in Commun. ACM}
}
@inproceedings{npu:micro,
  title     = {Neural Acceleration for General-Purpose Approximate Programs},
  author    = {Hadi Esmaeilzadeh and Adrian Sampson and Luis Ceze and Doug Burger},
  year      = 2012,
  booktitle = {MICRO}
}
@article{npu:toppicks,
  title   = {Neural Acceleration for General-Purpose Approximate Programs},
  author  = {Esmaeilzadeh, Hadi and Sampson, Adrian and Ceze, Luis and Burger, Doug},
  year    = 2013,
  month   = {May/June},
  journal = {in IEEE Micro Top Picks from the 2012 Computer Architecture Conferences},
  volume  = 33,
  number  = 3,
  pages   = {16--27}
}
@article{dark_silicon:cacm,
  title      = {Power challenges may end the multicore era},
  author     = {Esmaeilzadeh, Hadi and Blem, Emily and Amant, Ren{\'e}e St. and Sankaralingam, Karthikeyan and Burger, Doug},
  year       = 2013,
  journal    = {Commun. ACM},
  volume     = 56,
  number     = 2,
  issue_date = {February 2013}
}
month = {February},
 pages = {93--102},
@techreport{EnerJ-TR,
  title       = {{EnerJ: Approximate Data Types for Safe and General Low-Power Computation --- Full Proofs}},
  author      = {A. Sampson and W. Dietl and E. Fortuna and D. Gnanapragasam and L. Ceze and D. Grossman},
  year        = 2011,
  number      = {UW-CSE-10-12-01},
  institution = {University of Washington}
}
@article{infflow-survey,
  title   = {Language-based information-flow security},
  author  = {Sabelfeld, Andrei and Myers, Andrew C.},
  year    = 2003,
  journal = {{IEEE} Journal on Selected Areas in Communications, special issue on Formal Methods for Security},
  volume  = 21,
  number  = 1
}
@inproceedings{ersa,
  title     = {{ERSA}: error resilient system architecture for probabilistic applications},
  author    = {Leem, Larkhoon and Cho, Hyungmin and Bau, Jason and Jacobson, Quinn and Mitra, Subhasish},
  year      = 2010,
  booktitle = {DATE}
}
@inproceedings{endorsement,
  title     = {A Semantic Framework for Declassification and Endorsement},
  author    = {Aslan Askarov and Andrew C. Myers},
  year      = 2010,
  booktitle = {ESOP}
}
@inproceedings{aqeel,
  title     = {Power Consumption Breakdown on a Modern Laptop},
  author    = {Aqeel Mahesri and Vibhore Vardhan},
  year      = 2004,
  booktitle = {PACS}
}
@inproceedings{smartrefresh,
  title     = {Smart Refresh: An Enhanced Memory Controller Design for Reducing Energy in Conventional and {3D} Die-Stacked {DRAM}s},
  author    = {Ghosh, Mrinmoy and Lee, Hsien-Hsin S.},
  year      = 2007,
  booktitle = {MICRO}
}
@inproceedings{drowsycaches,
  title     = {Drowsy caches: simple techniques for reducing leakage power},
  author    = {Flautner, Kriszti\'{a}n and Kim, Nam Sung and Martin, Steve and Blaauw, David and Mudge, Trevor},
  year      = 2002,
  booktitle = {ISCA}
}
@inproceedings{carroll2010,
  title     = {An analysis of power consumption in a smartphone},
  author    = {Carroll, Aaron and Heiser, Gernot},
  year      = 2010,
  booktitle = {USENIX}
}
@inproceedings{googlepower,
  title     = {Power provisioning for a warehouse-sized computer},
  author    = {Fan, Xiaobo and Weber, Wolf-Dietrich and Barroso, Luiz Andre},
  year      = 2007,
  booktitle = {ISCA}
}
@inproceedings{burger2003,
  title     = {Microprocessor pipeline energy analysis},
  author    = {Natarajan, Karthik and Hanson, Heather and Keckler, Stephen W. and Moore, Charles R. and Burger, Doug},
  year      = 2003,
  booktitle = {ISLPED}
}
@inproceedings{mcpat,
  title     = {{McPAT}: An integrated power, area, and timing modeling framework for multicore and manycore architectures},
  author    = {Sheng Li and Jung Ho Ahn and Strong, R.D. and Brockman, J.B. and Tullsen, D.M. and Jouppi, N.P.},
  year      = 2009,
  booktitle = {MICRO}
}
@inproceedings{wattch,
  title     = {Wattch: a framework for architectural-level power analysis and optimizations},
  author    = {Brooks, David and Tiwari, Vivek and Martonosi, Margaret},
  year      = 2000,
  booktitle = {ISCA}
}
@inproceedings{bdtyping,
  title     = {Strict bidirectional type checking},
  author    = {Chlipala, Adam and Petersen, Leaf and Harper, Robert},
  year      = 2005,
  booktitle = {TLDI}
}
@article{IgarashiEA01,
  title   = {{F}eatherweight {J}ava: a minimal core calculus for {J}ava and {GJ}},
  author  = {A. Igarashi and B. C. Pierce and P. Wadler},
  year    = 2001,
  journal = {TOPLAS},
  volume  = 23,
  number  = 3
}
@inproceedings{razor,
  title     = {Razor: a low-power pipeline based on circuit-level timing speculation},
  author    = {Ernst, D. and Nam Sung Kim and Das, S. and Pant, S. and Rao, R. and Toan Pham and Ziesler, C. and Blaauw, D. and Austin, T. and Flautner, K. and Mudge, T.},
  year      = 2003,
  booktitle = {MICRO}
}
@inproceedings{hpca10cam,
  title     = {Designing a processor from the ground up to allow voltage/reliability tradeoffs},
  author    = {Kahng, A.B. and Seokhyeong Kang and Kumar, R. and Sartori, J.},
  year      = 2010,
  booktitle = {HPCA}
}
@inproceedings{stanleymarbell,
  title     = {Encoding Efficiency of Digital Number Representations under Deviation Constraints},
  author    = {Stanley-Marbell, Phillip},
  year      = 2009,
  booktitle = {ITW}
}
@misc{jsr308,
  title                    = {{Type Annotations} specification ({JSR} 308)},
  author                   = {Michael D. Ernst},
  year                     = 2008,
  howpublished             = {\url{http://types.cs.washington.edu/jsr308/}},
  hide_to_save_space_month = {September~12,}
}
@inproceedings{pap2008,
  title     = {Practical pluggable types for {Java}},
  author    = {Matthew M. Papi and Mahmood Ali and Correa Jr., Telmo Luis and Jeff H. Perkins and Michael D. Ernst},
  year      = 2008,
  booktitle = {ISSTA}
}
@inproceedings{li07,
  title     = {Application-Level Correctness and its Impact on Fault Tolerance},
  author    = {Li, Xuanhua and Yeung, Donald},
  year      = 2007,
  booktitle = {HPCA}
}
@inproceedings{wong-selse06,
  title     = {Soft Error Resilience of Probabilistic Inference Applications},
  author    = {Vicky Wong and Mark Horowitz},
  year      = 2006,
  booktitle = {SELSE}
}
@inproceedings{softcomputing,
  title     = {Exploiting soft computing for increased fault tolerance},
  author    = {Xuanhua Li and Donald Yeung},
  year      = 2006,
  booktitle = {ASGI}
}
@inproceedings{dekruijf-selse09,
  title     = {Exploring the synergy of emerging workloads and silicon reliability trends},
  author    = {de Kruijf, M. and Sankaralingam, K.},
  year      = 2009,
  booktitle = {SELSE}
}
@article{bitwidthred,
  title   = {Reducing power by optimizing the necessary precision/range of floating-point arithmetic},
  author  = {Tong, Jonathan Ying Fai and Nagle, David and Rutenbar, Rob. A.},
  year    = 2000,
  journal = {IEEE Trans. VLSI Syst.},
  volume  = 8,
  number  = 3
}
@mastersthesis{dramthesis,
  title  = {{SCALE} {DRAM} Subsystem Power Analysis},
  author = {Vimal Bhalodia},
  year   = 2005,
  school = {MIT}
}
@phdthesis{sramthesis,
  title  = {{SRAM} Leakage-Power Optimization Framework: a System Level Approach},
  author = {Animesh Kumar},
  year   = 2008,
  school = {University of California at Berkeley}
}
@inproceedings{popl83,
  title     = {Conversion of Control Dependence to Data Dependence},
  author    = {Allen, J. R. and Kennedy, Ken and Porterfield, Carrie and Warren, Joe},
  year      = 1983,
  booktitle = {POPL}
}
@inproceedings{jif,
  title     = {{JFlow}: practical mostly-static information flow control},
  author    = {Myers, Andrew C.},
  year      = 1999,
  booktitle = {POPL}
}
@article{flowproofs,
  title   = {Information flow inference for free},
  author  = {Pottier, Fran\c{c}ois and Conchon, Sylvain},
  year    = 2000,
  journal = {SIGPLAN Not.},
  volume  = 35,
  number  = 9
}
@inproceedings{rinard-onward,
  title     = {Patterns and statistical analysis for understanding reduced resource computing},
  author    = {Rinard, Martin and Hoffmann, Henry and Misailovic, Sasa and Sidiroglou, Stelios},
  year      = 2010,
  booktitle = {Onward!}
}
@inproceedings{srampaper,
  title     = {{SRAM} supply voltage scaling: A reliability perspective},
  author    = {Kumar, A. and Rabaey, J. and Ramchandran, K.},
  year      = 2009,
  booktitle = {{ISQED}}
}
@techreport{perforationtr,
  title       = {Using Code Perforation to Improve Performance, Reduce Energy Consumption, and Respond to Failures},
  author      = {Agarwal, Anant and Rinard, Martin and Sidiroglou, Stelios and Misailovic, Sasa and Hoffmann, Henry},
  year        = 2009,
  institution = {MIT}
}
@inproceedings{qosprof,
  title     = {Quality of Service Profiling},
  author    = {Sasa Misailovic and Stelios Sidiroglou and Hank Hoffman and Martin Rinard},
  year      = 2010,
  booktitle = {ICSE}
}
@inproceedings{flikker,
  title     = {Flikker: Saving Refresh-Power in Mobile Devices through Critical Data Partitioning},
  author    = {Song Liu and Karthik Pattabiraman and Thomas Moscibroda and Benjamin Zorn},
  year      = 2011,
  booktitle = {ASPLOS}
}
@inproceedings{relax,
  title     = {Relax: an architectural framework for software recovery of hardware faults},
  author    = {de Kruijf, Marc and Nomura, Shuou and Sankaralingam, Karthikeyan},
  year      = 2010,
  booktitle = {ISCA}
}
@inproceedings{green,
  title     = {Green: a framework for supporting energy-conscious programming using controlled approximation},
  author    = {Baek, Woongki and Chilimbi, Trishul},
  year      = 2010,
  booktitle = {PLDI}
}
@inproceedings{perry-sas,
  title     = {Reasoning about Control Flow in the Presence of Transient Faults},
  author    = {Perry, Frances and Walker, David},
  year      = 2008,
  booktitle = {SAS}
}
@inproceedings{perry-pldi,
  title     = {Fault-tolerant typed assembly language},
  author    = {Perry, Frances and Mackey, Lester and Reis, George A. and Ligatti, Jay and August, David I. and Walker, David},
  year      = 2007,
  booktitle = {PLDI}
}
@inproceedings{qualifiers,
  title     = {A theory of type qualifiers},
  author    = {Foster, Jeffrey S. and F\"{a}hndrich, Manuel and Aiken, Alexander},
  year      = 1999,
  booktitle = {PLDI}
}
@book{alpha:handbook,
  title     = {Alpha Architecture Handbook, Version 3},
  year      = 1996,
  publisher = {Digital Equipment Corporation}
}
@inproceedings{scc,
  title     = {A 48-Core {IA-32} message-passing processor with {DVFS} in 45nm {CMOS}},
  author    = {Howard, J. and others},
  year      = 2010,
  booktitle = {ISSCC}
}
@inproceedings{kim:isscc:11,
  title     = {A Fully-Integrated 3-Level {DC/DC} Converter for Nanosecond-Scale {DVS} with Fast Shunt Regulation},
  author    = {Wonyoung Kim and David Brooks and Gu-Yeon Wei},
  year      = 2011,
  booktitle = {ISSCC}
}
@inproceedings{pcmos,
  title     = {Ultra-efficient (embedded) {SOC} architectures based on probabilistic {CMOS} ({PCMOS}) technology},
  author    = {Chakrapani, Lakshmi N. and Akgul, Bilge E. S. and Cheemalavagu, Suresh and Korkmaz, Pinar and Palem, Krishna V. and Seshasayee, Balasubramanian},
  year      = 2006,
  booktitle = {DATE}
}
@inproceedings{pcmossurvey,
  title     = {Probabilistic {CMOS} Technology: A Survey and Future Directions},
  author    = {Akgul, B.E.S. and Chakrapani, L.N. and Korkmaz, P. and Palem, K.V.},
  year      = 2006,
  booktitle = {IFIP Intl. Conference on VLSI},
  url       = {http://dx.doi.org/10.1109/VLSISOC.2006.313282}
}
@article{fuzzymemo,
  title   = {Fuzzy Memoization for Floating-Point Multimedia Applications},
  author  = {Alvarez, Carlos and Corbal, Jesus and Valero, Mateo},
  year    = 2005,
  journal = {IEEE Transactions of Computer},
  volume  = 54,
  number  = 7
}
@inproceedings{ant,
  title     = {Energy-efficient signal processing via algorithmic noise-tolerance},
  author    = {Hegde, Rajamohana and Shanbhag, Naresh},
  year      = 1999,
  booktitle = {ISLPED}
}
@inproceedings{kumarhpca,
  title     = {Designing a processor from the ground up to allow voltage/reliability tradeoffs},
  author    = {Kahng, A.B. and Seokhyeong Kang and Kumar, R. and Sartori, J.},
  year      = 2010,
  booktitle = {HPCA},
  url       = {http://dx.doi.org/10.1109/HPCA.2010.5416652}
}
@inproceedings{stochasticproc,
  title     = {Scalable stochastic processors},
  author    = {Narayanan, Sriram and Sartori, John and Kumar, Rakesh and Jones, Douglas},
  year      = 2010,
  booktitle = {DATE}
}
@inproceedings{enerj,
  title     = {Ener{J}: Approximate Data Types for Safe and General Low-Power Computation},
  author    = {Sampson, A. and Dietl, W. and Fortuna, E. and Gnanapragasam, D. and Ceze, L. and Grossman, D.},
  year      = 2011,
  booktitle = {PLDI}
}
@inproceedings{enerj-long-pldi,
  title     = {Ener{J}: Approximate Data Types for Safe and General Low-Power Computation},
  author    = {Sampson, A. and Dietl, W. and Fortuna, E. and Gnanapragasam, D. and Ceze, L. and Grossman, D.},
  year      = 2011,
  booktitle = pldi
}
@article{borkarcacm,
  title   = {{The Future of Microprocessors: Technology Scaling: Changes the Technology Landscape Shapes}},
  author  = {Borkar, Shekhar Y. and Chien, Andrew A.},
  year    = 2011,
  journal = {CACM}
}
@inproceedings{ecosystem,
  title     = {{ECOSystem}: Managing Energy as a First Class Operating System Resource},
  author    = {Zeng, Heng and Ellis, Carla S. and   Lebeck, Alvin R. and Vahdat, Amin},
  year      = 2002,
  booktitle = {ASPLOS}
}
@inproceedings{compdvfs,
  title     = {A Dynamic Compilation Framework for Controlling Microprocessor Energy and Performance},
  author    = {Wu, Qiang and Martonosi, Margaret and Clark, Douglas W. and Reddi, V. J. and Connors, Dan and Wu, Youfeng and Lee, Jin and Brooks, David},
  year      = 2005,
  booktitle = {MICRO}
}
@inproceedings{Cacti,
  title     = {Optimizing {NUCA} Organizations and Wiring Alternatives for Large Caches with {CACTI} 6.0},
  author    = {Muralimanohar, Naveen and Balasubramonian, Rajeev and Jouppi, Norm},
  year      = 2007,
  booktitle = {MICRO},
  doi       = {http://dx.doi.org/10.1109/MICRO.2007.30}
}
@inproceedings{cactip,
  title     = {{CACTI-P: Architecture-level Modeling for SRAM-based Structures with Advanced Leakage Reduction Techniques}},
  author    = {S. Li and K. Chen and J. H. Ahn and J. B. Brockman and N. P. Jouppi},
  year      = 2011,
  booktitle = {ICCAD}
}
@article{Alpha21264,
  title   = {{The Aplha 21264 Microprocessor}},
  author  = {Borkar, Shekhar Y. and Chien, Andrew A.},
  year    = 1999,
  journal = {MICRO}
}
@article{biofus,
  title   = {Bio-inspired imprecise computational blocks for efficient {VLSI} implementation of soft-computing applications},
  author  = {Mahdiani, H. R. and Ahmadi, A. and Fakhraie, S. M. and Lucas, C.},
  year    = 2010,
  journal = {Trans. Cir. Sys. Part I},
  volume  = 57,
  doi     = {http://dx.doi.org/10.1109/TCSI.2009.2027626},
  issue   = 4
}
@article{dualvddchen,
  title   = {On gate level power optimization using dual-supply voltages},
  author  = {Chen, Chunhong and Srivastava, Ankur and Sarrafzadeh, Majid},
  year    = 2001,
  journal = {IEEE Trans. VLSI Syst.},
  volume  = 9,
  doi     = {10.1109/92.953496},
  issue   = 5
}
@inproceedings{dualvddyeh,
  title     = {Layout techniques supporting the use of dual supply voltages for cell-based designs},
  author    = {Yeh, Chingwei and Kang, Yin-Shuin and Shieh, Shan-Jih and Wang, Jinn-Shyan},
  year      = 1999,
  booktitle = {DAC},
  doi       = {http://doi.acm.org.offcampus.lib.washington.edu/10.1145/309847.309872}
}
@inproceedings{botlab-nsdi,
  title     = {Studying Spamming Botnets Using Botlab},
  author    = {John P. John and Alexander Moshchuk and Steven D. Gribble and Arvind Krishnamurthy},
  year      = 2009,
  booktitle = nsdi
}
@inproceedings{tripwires-nsdi,
  title     = {Detecting In-Flight Page Changes with Web Tripwires},
  author    = {Charles Reis and Steven D. Gribble and Tadayoshi Kohno},
  year      = 2008,
  booktitle = nsdi
}
@inproceedings{dds-osdi,
  title     = {Scalable, Distributed Data Structures for Internet Service Construction},
  author    = {Steven D. Gribble and Eric A. Brewer and Joseph M. Hellerstein and David Culler},
  year      = 2000,
  booktitle = osdi
}
@inproceedings{p2p-sosp,
  title     = {Measurement, Modeling, and Analysis of a Peer-to-Peer File-Sharing Workload},
  author    = {Krishna P. Gummadi and Richard J. Dunn and Stefan Saroiu and Steven D. Gribble and Henry M. Levy and John Zahorjan},
  year      = 2003,
  booktitle = sosp
}
@inproceedings{spyware-nsdi,
  title     = {Measurement and Analysis of Spyware in a University Environment},
  author    = {Stefan Saroiu and Steven D. Gribble and Henry M. Levy},
  year      = 2004,
  booktitle = nsdi
}
@inproceedings{flashproxy-mobisys,
  title     = {Flashproxy: Transparently Enabling Rich Web Content via Remote Execution},
  author    = {Alexander Moshchuk and Steven D. Gribble and Henry M. Levy},
  year      = 2008,
  booktitle = mobisys
}
@inproceedings{spyware-ndss,
  title     = {A Crawler-Based Study of Spyware on the Web},
  author    = {Alexander Moshchuk and Tanya Bragin and Steven D. Gribble and Henry M. Levy},
  year      = 2006,
  booktitle = ndss
}
@inproceedings{content-osdi,
  title     = {An Analysis of Internet Content Delivery Systems},
  author    = {Stefan Saroiu and Krishna P. Gummadi and Richard J. Dunn and Steven D. Gribble and Henry M. Levy},
  year      = 2002,
  booktitle = osdi
}
@misc{freepdk,
  title        = {{FreePDK45}},
  howpublished = {\url{https://www.eda.ncsu.edu/wiki/FreePDK45}}
}
@inproceedings{safeweb-hotos,
  title     = {Architectural Principles for Safe Web Programs},
  author    = {Charles Reis and Steven D. Gribble and Henry M. Levy},
  year      = 2007,
  booktitle = hotos
}
@inproceedings{chrome-eurosys,
  title     = {Isolating Web Programs in Modern Browser Architectures},
  author    = {Charles Reis and Steven D. Gribble},
  year      = 2009,
  booktitle = eurosys
}
@article{gemmini,
  title   = {Gemmini: An Agile Systolic Array Generator Enabling Systematic Evaluations of Deep-Learning Architectures},
  author  = {Genc, Hasan and Haj-Ali, Ameer and Iyer, Vighnesh and Amid, Alon and Mao, Howard and Wright, John and Schmidt, Colin and Zhao, Jerry and Ou, Albert and Banister, Max and others},
  year    = 2019,
  journal = {arXiv}
}
@inproceedings{denali-osdi,
  title     = {Scale and Performance in the Denali Isolation Kernel},
  author    = {Andrew Whitaker and Marianne Shaw and Steven D. Gribble},
  year      = 2002,
  booktitle = osdi
}
@inproceedings{denali-nsdi,
  title     = {Constructing Services with Interposable Virtual Hardware},
  author    = {Andrew Whitaker and Richard S. Cox and Marianne Shaw and Steven D. Gribble},
  year      = 2004,
  booktitle = nsdi
}
@inproceedings{homograph-usenix,
  title     = {Cutting through the Confusion: A Measurement Study of Homograph Attacks},
  author    = {Tobias Holgers and David E. Watson and Steven D. Gribble},
  year      = 2006,
  booktitle = usenix
}
@inproceedings{tahoma-oakland,
  title     = {A Safety-Oriented Platform for Web Applications},
  author    = {Richard S. Cox and Jacob Gorm Hansen and Steven D. Gribble and Henry M. Levy},
  year      = 2006,
  booktitle = oakland
}
@inproceedings{treadmarksusenix94,
  title     = {Software Versus Hardware Shared-Memory Implementation: A Case Study},
  author    = {Alan L. Cox and Sandhya Dwarkadas and Pete Keleher and Honghui Lui and Ramakrishnan Rajamony and Willy Zwaenepoel},
  year      = 1994,
  booktitle = usenix
}
@inproceedings{sosp09odr,
  title     = {{ODR}: Output-Deterministic Replay for Multicore Debugging},
  author    = {Gautam Altekar and Ion Stoica},
  year      = 2009,
  booktitle = sosp
}
@inproceedings{sosp09pres,
  title     = {{PRES}: Probabilistic Replay with Execution Sketching on Multiprocessors},
  author    = {Soyeon Park and Yuanyuan Zhou and Weiwei Xiong and Zuoning Yin and Rini Kaushik and Kyu H. Lee and Shan Lu},
  year      = 2009,
  booktitle = sosp
}
@inproceedings{osdi08chess,
  title     = {Finding and Reproducing Heisenbugs in Concurrent Programs},
  author    = {Madanlal Musuvathi and Shaz Qadeer and Thomas Ball and Gerard Basler and Piramanayagam Arumuga Nainar and Iulian Neamtiu},
  year      = 2008,
  booktitle = osdi
}
@article{tocs89coherence,
  title   = {Memory Coherence in Shared Virtual Memory Systems},
  author  = {Kai Li and Paul Hudak},
  year    = 1989,
  journal = tocs,
  volume  = 7,
  number  = 4
}
@article{tocs99recplay,
  title   = {{RecPlay}: A Fully Integrated Practical Record/Replay System},
  author  = {Michiel Ronsse and Koen De Bosschere},
  year    = 1999,
  journal = tocs,
  volume  = 17,
  number  = 2
}
@article{acm90schneider,
  title   = {Implementing Fault-Tolerant Services Using the State Machine Approach: A Tutorial},
  author  = {Fred B. Schneider},
  year    = 1990,
  journal = {{ACM} Comput. Surv.},
  volume  = 22,
  number  = 4
}
@misc{cra,
  title        = {{CRA} Grand Research Challenges},
  howpublished = {\url{http://www.cra.org/Activities/grand.challenges/architecture/home.html}}
}
@misc{MICRON,
  title        = {Hybrid Memory Cube},
  howpublished = {\url{http://www.micron.com/innovations/hmc.html}}
}
@inproceedings{delorean,
  title     = {{DeLorean}: Recording and Deterministically Replaying Shared-Memory Multiprocessor Execution Efficiently},
  author    = {Pablo Montesinos and Luis Ceze and Josep Torrellas},
  year      = 2008,
  booktitle = isca
}
@inproceedings{strata,
  title     = {Recording Shared Memory Dependencies Using {S}trata},
  author    = {Satish Narayanasamy and Cristiano Pereira and Brad Calder},
  year      = 2006,
  booktitle = asplos
}
@inproceedings{atomaid,
  title     = {{Atom-Aid}: Detecting and Surviving Atomicity Violations},
  author    = {Brandon Lucia and Joseph Devietti and Karin Strauss and Luis Ceze},
  year      = 2008,
  booktitle = isca
}
@inproceedings{fdr,
  title     = {{A "Flight Data Recorder" for Enabling Full-System Multiprocessor Deterministic Replay}},
  author    = {Min Xu and Rastislav Bodik and Mark D. Hill},
  year      = 2003,
  booktitle = isca
}
@inproceedings{fdr2,
  title     = {A Regulated Transitive Reduction ({RTR}) for Longer Memory Race Recording},
  author    = {Min Xu and Mark D. Hill and Rastislav Bodik},
  year      = 2006,
  booktitle = asplos
}
@inproceedings{streamit,
  title     = {{StreamIt}: A Language for Streaming Applications},
  author    = {William Thies and Michal Karczmarek and Saman P. Amarasinghe},
  year      = 2002,
  booktitle = cc
}
@article{jade,
  title   = {The Design, Implementation, and Evaluation of {J}ade},
  author  = {Martin Rinard and Monica Lam},
  year    = 1998,
  journal = toplas,
  volume  = 20,
  number  = 3
}
@inproceedings{bulk,
  title     = {Bulk Disambiguation of Speculative Threads in Multiprocessors},
  author    = {Luis Ceze and James Tuck and Calin Cascaval and Josep Torrellas},
  year      = 2006,
  booktitle = isca
}
@inproceedings{bulksc,
  title     = {{BulkSC}: Bulk Enforcement of Sequential Consistency},
  author    = {Luis Ceze and  James Tuck and Pablo Montesinos and Josep Torrellas},
  year      = 2007,
  booktitle = isca
}
@inproceedings{colorama,
  title     = {Colorama: Architectural Support for Data-Centric Synchronization},
  author    = {Luis Ceze and Pablo Montesinos and Christop von Praun and Josep Torrellas},
  year      = 2007,
  booktitle = hpca
}
@inproceedings{swbulk,
  title     = {{SoftSig}: Software-Exposed Hardware Signatures for Memory Disambiguation},
  author    = {James Tuck and Wonsun Ahn and Luis Ceze and Josep Torrellas},
  year      = 2008,
  booktitle = asplos
}
@inproceedings{rerun,
  title     = {Rerun: Exploiting Episodes for Lightweight Memory Race Recording},
  author    = {Derek Hower and Mark D. Hill},
  year      = 2008,
  booktitle = isca
}
@article{rx,
  title   = {Rx: Treating Bugs as Allergies---A Safe Method to Survive Software Failures},
  author  = {Feng Qin and Joseph Tucek and Yuanyuan Zhou and Jagadeesan Sundaresan},
  year    = 2007,
  journal = tocs,
  volume  = 25,
  number  = 3
}
@inproceedings{xupldi05,
  title     = {A Serializability Violation Detector for Shared-Memory Server Programs},
  author    = {Min Xu and Rastislav Bodik and Mark D. Hill},
  year      = 2005,
  booktitle = pldi
}
@inproceedings{rinard-failure,
  title     = {Enhancing Server Availability and Security Through Failure-Oblivious Computing},
  author    = {Martin Rinard and Cristian Cadar and Daniel Dumitran and Daniel M. Roy and Tudor Leu and William S. {Beebee Jr.}},
  year      = 2004,
  booktitle = osdi
}
@inproceedings{dmp,
  title     = {{DMP}: Deterministic Shared Memory Multiprocessing},
  author    = {Joseph Devietti and Brandon Lucia and Luis Ceze and Mark Oskin},
  year      = 2009,
  booktitle = asplos
}
@inproceedings{asplos10coredet,
  title     = {{CoreDet}: A Compiler and Runtime System for Deterministic Multithreaded Execution},
  author    = {Tom Bergan and Owen Anderson and Joseph Devietti and Luis Ceze and Dan Grossman},
  year      = 2010,
  booktitle = asplos
}
@inproceedings{dmpos,
  title     = {Deterministic Process Groups in {dOS}},
  author    = {Tom Bergan and Nick Hunt and Luis Ceze and Steven Gribble},
  year      = 2010,
  booktitle = osdi
}
@inproceedings{grace,
  title     = {Grace: Safe Multithreaded Programming for {C/C++}},
  author    = {Emery D. Berger and Ting Yang and Tongping Liu and Gene Novark},
  year      = 2009,
  booktitle = oopsla
}
@inproceedings{kendo,
  title     = {Kendo: Efficient Deterministic Multithreading in Software},
  author    = {Marek Olszewski and Jason Ansel and Saman Amarasinghe},
  year      = 2009,
  booktitle = asplos
}
@techreport{nesl,
  title       = {{NESL: A Nested Data-Parallel Language (Version 3.1)}},
  author      = {Guy Blelloch},
  institution = {CMU}
}
@inproceedings{dpj,
  title     = {A Type and Effect System for Deterministic Parallel {J}ava},
  author    = {Robert Bocchino and Vikram Adve and Danny Dig and Sartia Adve and Stephen Heumann and Rakesh Komuravelli and Jeffrey Overbey and Patrick Simmons and Hyojin Sung and Mohsen Vakilian},
  year      = 2009,
  booktitle = oopsla
}
@inproceedings{cpp-memmodel,
  title     = {Foundations of the {C++} Concurrency Memory Model},
  author    = {Hans-J. Boehm and Sarita Adve},
  year      = 2008,
  booktitle = pldi
}
@inproceedings{jmm-memmodel,
  title     = {The {J}ava Memory Model},
  author    = {Jeremy Manson and William Pugh and Sarita Adve},
  year      = 2005,
  booktitle = popl
}
@inproceedings{weak-ordering,
  title     = {Weak Ordering -- A New Definition},
  author    = {Sarita Adve and Mark D. Hill},
  year      = 1990,
  booktitle = isca
}
@inproceedings{ieee96nondetfault,
  title     = {Supporting Nondeterministic Execution in Fault-Tolerant Systems},
  author    = {J. H. Slye and E. N. Elnozahy},
  year      = 1996,
  booktitle = {International Symposium on Fault-Tolerant Computing}
}
@inproceedings{hotdep03nondet,
  title     = {Static Analysis Meets Distributed Fault-Tolerance: Enabling State-Machine Replication With Nondeterminism},
  author    = {Joseph G. Slember and Priya Narasimhan},
  year      = 2006,
  booktitle = {Workshop on Hot Topics in System Dependability}
}
@article{ieee06activerep,
  title   = {Active Replication of Multithreaded Applications},
  author  = {Claudio Basile and Zbigniew Kalbarczyk and Ravishankar K. Iyer},
  year    = 2006,
  journal = {{IEEE} Transactions on Parallel and Distributed Systems},
  volume  = 17,
  number  = 5
}
@inproceedings{pldi05dart,
  title     = {{DART}: Directed Automated Random Testing},
  author    = {Patrice Godefroid and Nils Klarlund and Koushik Sen},
  year      = 2005,
  booktitle = pldi
}
@inproceedings{emsoft05shim,
  title     = {{SHIM}: A Deterministic Model for Heterogeneous Embedded Systems},
  author    = {Stephen A. Edwards and Olivier Tardieu},
  year      = 2005,
  booktitle = {{ACM} International Conference on Embedded Software}
}
@misc{determinizer-model,
  title        = {Deterministic Consistency: A Programming Model for Shared Memory Parallelism},
  author       = {Amittai Aviram and Bryan Ford},
  year         = 2010,
  howpublished = {\url{http://arxiv.org/abs/0912.0926}}
}
@inproceedings{determinizer-osdi,
  title     = {Efficient System-Enforced Deterministic Parallelism},
  author    = {Amittai Aviram and Shu-Chun Weng and Sen Hu and Bryan Ford},
  year      = 2010,
  booktitle = osdi
}
@inproceedings{netzer,
  title     = {Optimal Tracing and Replay for Debugging Shared-Memory Parallel Programs},
  author    = {Robert H. B. Netzer},
  year      = 1993,
  booktitle = {{ACM/ONR} Workshop on Parallel and Distributed Debugging}
}
@inproceedings{ieee99corba,
  title     = {Enforcing Determinism for the Consistent Replication of Multithreaded {CORBA} Applications},
  author    = {P. Narasimhan and L. Moser and P. M. Melliar-Smith},
  year      = 1999,
  booktitle = {{IEEE} Symposium on Reliable Distributed Systems}
}
@inproceedings{ieee03preemptive,
  title     = {A Preemptive Deterministic Scheduling Algorithm for Multithreaded Replicas},
  author    = {Claudio Basile and Zbigniew Kalbarczyk and Ravi Iyer},
  year      = 2003,
  booktitle = {International Conference on Dependable Systems and Networks}
}
@article{cacm96horus,
  title   = {{Horus: A Flexible Group Communication System}},
  author  = {R. {Van Renesse} and K. Birman and S. Maffeis},
  year    = 1996,
  month   = {April},
  journal = {Communications of the {ACM}},
  volume  = 39,
  number  = 4
}
@article{cacm93isis,
  title   = {{The Process Group Approach to Reliable Distributed Computing}},
  author  = {K. P. Birman},
  year    = 1993,
  month   = {December},
  journal = {Communications of the {ACM}},
  volume  = 36,
  number  = 12
}
@article{paxos,
  title   = {{The Part-Time Parliament}},
  author  = {Leslie Lamport},
  year    = 1998,
  month   = {May},
  journal = tocs,
  volume  = 16,
  number  = 2
}
@inproceedings{hotos07replicant,
  title     = {Relaxed Determinism: Making Redundant Execution on Multiprocessors Practical},
  author    = {Jesse Pool and Ian Sin Kwok Wong and David Lie},
  year      = 2007,
  booktitle = hotos
}
@inproceedings{asplos10respec,
  title     = {Respec: Efficient Online Multiprocessor Replay via Speculation and External Determinism},
  author    = {Dongyoon Lee and Benjamin Wester and Kaushik Veeraraghavan and Satish Narayanasamy and Peter M. Chen and Jason Flinn},
  year      = 2010,
  booktitle = asplos
}
@inproceedings{tlsooo,
  title     = {Tasking with Out-of-Order Spawn in {TLS} Chip Multiprocessors: Microarchitecture and Compilation},
  author    = {Jose Renau and James Tuck and Wei Liu and Luis Ceze and Karin Strauss and Josep Torrellas},
  year      = 2005,
  booktitle = {International Conference on Supercomputing}
}
@inproceedings{cyclops,
  title     = {Evaluation of a Multithreaded Architecture for Cellular Computing},
  author    = {Calin Cascaval and Jose G. Castanos and Luis Ceze and Monty Denneau and Manish Gupta and Derek Lieber and Jose E. Moreira and Karin Strauss and Henry S. {Warren Jr.}},
  year      = 2002,
  booktitle = hpca
}
@inproceedings{ipot,
  title     = {Implicit Parallelism with Ordered Transactions},
  author    = {Christoph von Praun and Luis Ceze and Calin Cascaval},
  year      = 2007,
  booktitle = ppopp
}
@inproceedings{posh,
  title     = {{POSH}: A {TLS} Compiler that Exploits Program Structure},
  author    = {Wei Liu and James Tuck and Luis Ceze and Wonsun Ahn and Karin Strauss and Jose Renau and Josep Torrellas},
  year      = 2006,
  booktitle = ppopp
}
@article{aatoppicks,
  title   = {{Atom-Aid}: Detecting and Surviving Atomicity Violations},
  author  = {Brandon Lucia and Joseph Devietti and Karin Strauss and Luis Ceze},
  year    = 2009,
  journal = {IEEE Micro Top Picks in Computer Architecture}
}
@inproceedings{bugaboo,
  title     = {Finding Concurrency Bugs with Context-Aware Communication Graphs},
  author    = {Brandon Lucia and Luis Ceze},
  year      = 2009,
  booktitle = micro
}
@inproceedings{ce-isca10,
  title     = {Conflict Exceptions: Providing Simple Concurrent Language Semantics with Precise Hardware Exceptions for Data Races},
  author    = {Brandon Lucia and Luis Ceze and Karin Strauss and Shaz Qadeer and Hans-J. Boehm},
  year      = 2010,
  booktitle = isca
}
@inproceedings{cs-isca10,
  title     = {{ColorSafe}: Architectural Support for Debugging and Dynamically Avoiding Multi-variable Atomicity Violations},
  author    = {Brandon Lucia and Luis Ceze and Karin Strauss},
  year      = 2010,
  booktitle = isca
}
@article{dmptoppicks,
  title   = {{DMP}: Deterministic Shared Memory Multiprocessing},
  author  = {Joseph Devietti and Brandon Lucia and Luis Ceze and Mark Oskin},
  year    = 2010,
  journal = {{IEEE} Micro Top Picks in Computer Architecture}
}
@article{myers-sabelfeld,
  title   = {Language-based information-flow security},
  author  = {Andrei Sabelfeld and Andrew C. Myers},
  year    = 2003,
  month   = jan,
  journal = {{IEEE} Journal on Selected Areas in Communications, special issue on Formal Methods for Security},
  volume  = 21,
  number  = 5
}
@inproceedings{oshajava,
  title     = {Composable Specifications for Structured Shared-Memory Communication},
  author    = {Benjamin P. Wood and Adrian Sampson and Luis Ceze and Dan Grossman},
  year      = 2010,
  booktitle = oopsla
}
@inproceedings{atomsfamily,
  title     = {High-Level Small-Step Operational Semantics for Transactions},
  author    = {Katherine F. Moore and Dan Grossman},
  year      = 2008,
  booktitle = popl
}
@inproceedings{analogy,
  title     = {The Transactional Memory / Garbage Collection Analogy},
  author    = {Dan Grossman},
  year      = 2007,
  booktitle = {{ACM} Conference on Object-Oriented Programming Systems, Languages, and Applications (Essays Track)}
}
@inproceedings{pldi-stm07,
  title     = {Enforcing Isolation and Ordering in {STM}},
  author    = {Tatiana Shpeisman and Vijay Menon and Ali-Reza Adl-Tabatabai and Steve Balensiefer and Dan Grossman and Richard Hudson and Katherine F. Moore and Bratin Saha},
  year      = 2007,
  booktitle = pldi
}
@inproceedings{atomcaml,
  title     = {{AtomCaml}: First-Class Atomicity via Rollback},
  author    = {Michael F. Ringenburg and Dan Grossman},
  year      = 2005,
  booktitle = icfp
}
@inproceedings{cyclone-threads,
  title     = {Type-Safe Multithreading in {C}yclone},
  author    = {Dan Grossman},
  year      = 2003,
  booktitle = tldi
}
@inproceedings{tpdlls,
  title     = {A Theory of Platform-Dependent Low-Level Software},
  author    = {Marius Nita and Dan Grossman and Craig Chambers},
  year      = 2008,
  booktitle = popl
}
@article{qtil,
  title   = {Quantified Types in Imperative Languages},
  author  = {Dan Grossman},
  year    = 2006,
  journal = toplas,
  volume  = 28,
  number  = 3
}
@inproceedings{cyclone-regions,
  title     = {Region-Based Memory Management in {C}yclone},
  author    = {Dan Grossman and Greg Morrisett and Trevor Jim and Michael Hicks and Yanling Wang and James Cheney},
  year      = 2002,
  booktitle = {PLDI}
}
@inproceedings{transactions-continuations,
  title     = {Software Transactions Meet First-Class Continuations},
  author    = {Aaron Kimball and Dan Grossman},
  year      = 2007,
  booktitle = {Annual Workshop on Scheme and Functional Programming}
}
@inproceedings{atomjava-mspc,
  title     = {Atomicity via Source-to-Source Translation},
  author    = {Benjamin Hindman and Dan Grossman},
  year      = 2006,
  booktitle = mspc
}
@inproceedings{memmodel-mspc,
  title     = {What Do High-Level Memory Models Mean for Transactions?},
  author    = {Dan Grossman and Jeremy Manson and William Pugh},
  year      = 2006,
  booktitle = mspc
}
@inproceedings{endianness,
  title     = {Automatic Transformation of Bit-Level {C} Code to Support Multiple Equivalent Data Layouts},
  author    = {Marius Nita and Dan Grossman},
  year      = 2008,
  booktitle = cc
}
@inproceedings{format-string,
  title     = {Preventing Format-String Attacks via Automatic and Efficient Dynamic Checking},
  author    = {Michael F. Ringenburg and Dan Grossman},
  year      = 2005,
  booktitle = ccs
}
@article{cyclone-safemm-experience-scp,
  title   = {Safe Manual Memory Management in {C}yclone},
  author  = {Nikhil Swamy and Michael Hicks and Greg Morrisett and Dan Grossman and Trevor Jim},
  year    = 2006,
  journal = scp # {, Special Issue: Five perspectives on modern memory management -- Systems, hardware and theory},
  volume  = 62,
  number  = 2
}
@inproceedings{cyclone-usenix,
  title     = {{C}yclone: A Safe Dialect of {C}},
  author    = {Trevor Jim and Greg Morrisett and Dan Grossman and Michael Hicks and James Cheney and Yanling Wang},
  year      = 2002,
  booktitle = usenix
}
@inproceedings{exists-imp,
  title     = {Existential Types for Imperative Languages},
  author    = {Dan Grossman},
  year      = 2002,
  booktitle = esop,
  publisher = springer,
  series    = lncs,
  volume    = 2305
}
@inproceedings{lockprediction,
  title     = {Lock Prediction},
  author    = {Brandon Lucia and Joseph Devietti and Tom Bergan and Luis Ceze and Dan Grossman},
  year      = 2010,
  booktitle = hotpar
}
@inproceedings{sigmod2010,
  title     = {{ParaTimer}: A Progress Indicator for MapReduce {DAGs}},
  author    = {Kristi Morton and Magdalena Balazinska and Dan Grossman},
  year      = 2010,
  booktitle = sigmod
}
@inproceedings{pldi-seminal07,
  title     = {Searching for Type-Error Messages},
  author    = {Benjamin Lerner and Matthew Flower and Dan Grossman and Craig Chambers},
  year      = 2007,
  booktitle = pldi
}
@inproceedings{oopsla2010aspects,
  title     = {Supporting Dynamic, Third-Party Code Customizations in {JavaScript} Using Aspects},
  author    = {Benjamin S. Lerner and Herman Venter and Dan Grossman},
  year      = 2010,
  month     = oct,
  booktitle = oopsla
}
@misc{amdasf,
  title        = {{Advanced Synchronization Facility: Proposed Architectural Specification}},
  author       = {AMD},
  year         = 2009,
  howpublished = {\url{http://developer.amd.com/cpu/ASF/Pages/default.aspx}}
}
@inproceedings{logtm,
  title     = {{LogTM: Log-Based Transactional Memory}},
  author    = {Kevin Moore and Jayaram Bobba and Michelle J. Moravam and Mark Hill and David Wood},
  year      = 2006,
  booktitle = hpca
}
@inproceedings{vtm,
  title     = {{Virtualizing Transactional Memory}},
  author    = {Ravi Rajwar and Maurice Herlihy and Konrad Lai},
  year      = 2005,
  booktitle = isca
}
@misc{sunrock,
  title        = {{Sun slots transactional memory into Rock}},
  howpublished = {\url{http://www.theregister.co.uk/2007/08/21/sun_transactional_memory_rock/}}
}
@inproceedings{tcc,
  title     = {{Transactional Memory Coherence and Consistency}},
  author    = {Lance Hammond and Vicky Wong and Mike Chen and Brian D. Carlstrom and John D. Davis and Ben Hertzberg and Manohar K. Prabhu and Honggo Wijaya and Christos Kozyrakis and Kunle Olukotun},
  year      = 2004,
  booktitle = isca
}
@inproceedings{koushik-concolic,
  title     = {{CUTE: A Concolic Unit Testing Engine for C}},
  author    = {Koushi Sen and Darko Marinov and Gul Agha},
  year      = 2005,
  booktitle = fse
}
@inproceedings{exe-ccs06,
  title     = {{Preventing Format-String Attacks via Automatic and Efficient Dynamic Checking}},
  author    = {Cristian Cadar  and Vijay Ganesh  and Peter M. Pawlowski  and David L. Dill  and Dawson R. Engler},
  year      = 2006,
  booktitle = ccs
}
@inproceedings{fdrtg-icse07,
  title     = {{Feedback-Directed Random Test Generation}},
  author    = {Carlos Pacheco and Shuvendu K. Lahiri and Michael D. Ernst and Thomas Ball},
  year      = 2007,
  booktitle = icse
}
@inproceedings{koushik-detcheck1,
  title     = {{Asserting and Checking Determinism for Multithreaded Programs}},
  author    = {J. Burnim and K. Sen},
  year      = 2009,
  booktitle = fse
}
@inproceedings{koushik-detcheck2,
  title     = {{DETERMIN: Inferring Likely Deterministic Specifications of Multithreaded Programs}},
  author    = {J. Burnim and K. Sen},
  year      = 2010,
  booktitle = ICSE
}
@inproceedings{josep-detcheck,
  title     = {{InstantCheck: Checking the Determinism of Parallel Programs Using On-the-fly Incremental Hashing}},
  author    = {Adrian Nistor and Darko Marinov and Josep Torrellas},
  year      = 2010,
  booktitle = micro
}
@inproceedings{shcmp08,
  title     = {Explicitly Parallel Programming with Shared-Memory is Insane: At Least Make it Deterministic!},
  author    = {Joseph Devietti and Brandon Lucia and Luis Ceze and Mark Oskin},
  year      = 2008,
  booktitle = {Workshop on Software and Hardware Challenges of Manycore Platforms}
}
@inproceedings{wodet11,
  title     = {The Deterministic Execution Hammer: How Well Does it Actually Pound Nails?},
  author    = {Tom Bergan and Joseph Devietti and Nicholas Hunt and Luis Ceze},
  year      = 2011,
  booktitle = {Workshop on Determinism and Correctness in Parallel Programming}
}
@inproceedings{asplos11rcdc,
  title     = {{RCDC}: A Relaxed-Consistency Deterministic Computer},
  author    = {Joseph Devietti and Jacob Nelson and Tom Bergan and Luis Ceze and Dan Grossman},
  year      = 2011,
  booktitle = {ASPLOS}
}
@inproceedings{truffle,
  title     = {Architecture Support for Disciplined Approximate Programming},
  author    = {Hadi Esmaeilzadeh and Adrian Sampson and Luis Ceze and Doug Burger},
  year      = 2012,
  booktitle = {ASPLOS}
}
@inproceedings{dpj-popl,
  title     = {Safe Nondeterminism in a Deterministic-by-Default Parallel Language},
  author    = {Robert Bocchino and Stephen Heumann and Nima Honarmand and Sarita V. Adve and Vikram S. Adve and Adam Welc and Tatiana Shpeisman},
  year      = 2011,
  booktitle = popl
}
@inproceedings{haskell-dynsep,
  title     = {Region-Based Dynamic Separation for {STM} {H}askell},
  author    = {Laura Effinger-Dean and Dan Grossman},
  year      = 2011,
  booktitle = transact
}
@inproceedings{dre-benefits,
  title     = {Data-Race Exceptions Have Benefits Beyond the Memory Model},
  author    = {Benjamin P. Wood and Luis Ceze and Dan Grossman},
  year      = 2011,
  booktitle = mspc
}
@inproceedings{userace-mspc11,
  title     = {{Data-Race Exceptions Have Benefits Beyond the Memory Model}},
  author    = {Benjamin Wood and Luis Ceze and Dan Grossman},
  year      = 2011,
  booktitle = {Workshop on Memory System Performance and Correctness w/ Conference on Programming Language Design and Implementation (MSPC w/ PLDI)},
  url       = {}
}
@inproceedings{racedet-europar,
  title     = {{Accelerating Data Race Detection with Minimal Hardware Support}},
  author    = {Rodrigo Gonzalez-Alberquilla and Karin Strauss and Luis Pinuel and Luis Ceze},
  year      = 2011,
  booktitle = {EuroPar},
  url       = {}
}
@inproceedings{tern,
  title     = {{Stable Deterministic Multithreading through Schedule Memoization}},
  author    = {Heming Cui and Jingyue Wu and Chia-Che Tsai and Junfeng Yang},
  year      = 2010,
  booktitle = osdi
}
@misc{LazyFpu:2011,
  title  = {How lazy {FPU} context switch works},
  author = {NetBSD{ D}ocumentation},
  year   = 2011,
  url    = {http://www.netbsd.org/docs/kernel/lazyfpu.html}
}
@article{Peng:2007,
  title   = {An Analog Programmable Multidimensional Radial Basis Function Based Classifier},
  author  = {Peng, Sheng-Yu and Hasler, P.E and Anderson, D.V},
  year    = 2007,
  journal = {TCAS-I},
  volume  = 54,
  number  = 10,
  pages   = {2148--2158}
}
@article{Oh:2004,
  title   = {{GPU} implementation of neural networks},
  author  = {Kyoung-Su Oh and Keechul Jung},
  year    = 2004,
  journal = {Pattern Recognition},
  volume  = 37,
  number  = 6,
  pages   = {1311--1314}
}
@inproceedings{Guzhva:2009,
  title     = {Multifold Acceleration of Neural Network Computations Using {GPU}},
  author    = {Guzhva, Alexander and Dolenko, Sergey and Persiantsev, Igor},
  year      = 2009,
  booktitle = {ICANN '09},
  pages     = {373--380}
}
@inproceedings{Zhu:Fpl:03,
  title     = {{FPGA} implementations of neural networks: A survey of a decade of progress},
  author    = {Jihan Zhu and Peter Sutton},
  year      = 2003,
  booktitle = {FPL},
  pages     = {1062--1066}
}
@book{DigitalNnBook:93,
  title     = {Parallel Digital Implementations of Neural Networks},
  year      = 1993,
  publisher = {Prentice Hall},
  editor    = {K.W. Przytula and V.K. Prasanna Kumar}
}
@article{Galal:2011,
  title   = {Energy-Efficient Floating-Point Unit Design},
  author  = {Galal, S and Horowitz, M},
  year    = 2011,
  journal = {IEEE Trans. Comput.},
  volume  = 60,
  number  = 7,
  pages   = {913--922}
}
@incollection{Rumelhart:SPDP:86,
  title     = {Learning internal representations by error propagation},
  author    = {Rumelhart, D. E. and Hinton, G. E. and Williams, R. J.},
  year      = 1986,
  booktitle = {Parallel Distributed Processing: Explorations in the Microstructure of Cognition},
  publisher = {MIT Press},
  volume    = 1,
  pages     = {318--362},
  numpages  = 45
}
@article{Boser:JSCC:91,
  title   = {An Analog Neural Network Processor with Programmable Topology},
  author  = {Bernhard E. Boser and Eduard Säckinger and Jane Bromley and Yann Lecun and Lawrence D. Jackel and Senior Member},
  year    = 1991,
  journal = {J. Solid-State Circuits},
  volume  = 26,
  pages   = {2017--2025}
}
@inproceedings{Diorio:ICCS:95,
  title     = {A High-Resolution Nonvolatile Analog Memory Cell},
  author    = {Chris Diorio and Sunit Mahajan and Paul Hasler and Bradley Minch and Carver Mead},
  year      = 1995,
  booktitle = {ISCAS}
}
@inproceedings{Stanley:AAAI:06,
  title     = {Real-time evolution of neural networks in the {NERO} video game},
  author    = {Kenneth O. Stanley and Bobby D. Bryant and Igor Karpov and Risto Miikkulainen},
  year      = 2006,
  booktitle = {AAAI}
}
@inproceedings{Esmaeilzadeh:ISCAS:06,
  title     = {Neural network stream processing core ({NnSP}) for embedded systems},
  author    = {Esmaeilzadeh, H. and Saeedi, P. and Araabi, B.N. and Lucas, C. and Fakhraie, S.M.},
  year      = 2006,
  booktitle = {ISCAS}
}
@inproceedings{Hashmi:ISCA:11,
  title     = {Automatic Abstraction and Fault Tolerance in Cortical Microarchitectures},
  author    = {Atif Hashmi and Hugues Berry and Olivier Temam and Mikko H. Lipasti},
  year      = 2011,
  booktitle = {ISCA}
}
@inproceedings{Tam:SMC:90,
  title     = {Learning on an analog {VLSI} neural network chip},
  author    = {Tam, S.M. and Gupta, B. and Castro, H.A. and Holler, M.},
  year      = 1990,
  booktitle = {SMC}
}
@inproceedings{Lande:MicroNeuro:96,
  title     = {An Analog Floating-gate Memory in a Standard Digital Technology},
  author    = {Tor Sverre Lande and Hassan Ranjbar and Mohammed Ismail and Yngvar Berg},
  year      = 1996,
  booktitle = {MicroNeuro}
}
@inproceedings{qscores,
  title     = {{QsCores}: Trading Dark Silicon for Scalable Energy Efficiency with Quasi-Specific Cores},
  author    = {Ganesh Venkatesh and John Sampson and Nathan Goulding and Sravanthi Kota Venkata and Steven Swanson and Michael Taylor},
  year      = 2011,
  booktitle = {MICRO}
}
@inproceedings{greendroid,
  title     = {{GreenDroid}: A Mobile Application Processor for a Future of Dark Silicon},
  author    = {Nathan Goulding and Jack Sampson and Ganesh Venkatesh and Saturnino Garcia and Joe Auricchio and Jonathan Babb and Michael Taylor and Steven Swanson},
  year      = 2010,
  booktitle = {HotChips}
}
@inproceedings{chimps,
  title     = {{CHiMPS}: A high-level compilation flow for hybrid {CPU-FPGA} architectures},
  author    = {Putnam, Andrew R. and Bennett, Dave and Dellinger, Eric and Mason, Jeff and Sundararajan, Prasanna},
  year      = 2008,
  booktitle = {FPGA}
}
@inproceedings{customisa,
  title     = {Application-Specific Processing on a General-Purpose Core via Transparent Instruction Set Customization},
  author    = {Clark, Nathan and Kudlur, Manjunath and Park, Hyunchul and Mahlke, Scott and Flautner, Krisztian},
  year      = 2004,
  booktitle = {MICRO}
}
@inproceedings{prisc,
  title     = {A high-performance microarchitecture with hardware-programmable functional units},
  author    = {Razdan, Rahul and Smith, Michael D.},
  year      = 1994,
  booktitle = {MICRO}
}
@inproceedings{mahlkeHPCA,
  title     = {Bridging the computation gap between programmable processors and hardwired accelerators},
  author    = {Fan, K. and Kudlur, M. and Dasika, G. and Mahlke, S.},
  year      = 2009,
  booktitle = {HPCA}
}
@inproceedings{nisa,
  title     = {A case for neuromorphic {ISAs}},
  author    = {Hashmi, Atif and Nere, Andrew and Thomas, James Jamal and Lipasti, Mikko},
  year      = 2011,
  booktitle = {ASPLOS}
}
@inproceedings{analogintegration,
  title     = {Wafer-scale integration of analog neural networks},
  author    = {Schemmel, J. and Fieres, J. and Meier, K.},
  year      = 2008,
  booktitle = {IJCNN}
}
@inproceedings{darksilicon,
  title     = {Dark silicon and the end of multicore scaling},
  author    = {Esmaeilzadeh, Hadi and Blem, Emily and St. Amant, Renee and Sankaralingam, Karthikeyan and Burger, Doug},
  year      = 2011,
  booktitle = {ISCA}
}
@inproceedings{perforation,
  title     = {Managing performance vs. accuracy trade-offs with loop perforation},
  author    = {Sidiroglou, Stelios and Misailovic, Sasa and Hoffmann, Henry and Rinard, Martin},
  year      = 2011,
  booktitle = {FSE}
}
@inproceedings{rinard-popl12,
  title     = {Randomized Accuracy-Aware Program Transformations for Efficient Approximate Computations},
  author    = {Zeyuan Allen Zhu and Sasa Misailovic and Jonathan Kelner and Martin Rinard},
  year      = 2012,
  booktitle = {POPL}
}
@inproceedings{rinard-sas11,
  title     = {Probabilistically Accurate Program Transformations},
  author    = {Stelios Sidiroglou and Sasa Misailovic and Henry Hoffman and Martin Rinard},
  year      = 2011,
  booktitle = {SAS}
}
@inproceedings{rinard-asplos11,
  title     = {Dynamic Knobs for Responsive Power-Aware Computation},
  author    = {Henry Hoffman and Stelios Sidiroglou and Michael Carbin and Sasa Misailovic and Anant Agarwal and Martin Rinard},
  year      = 2011,
  booktitle = {ASPLOS}
}
%%%%%%%%%%%% copy/paste references from Truffle paper %%%%%%%%%%%%%
@article{li08,
  title   = {Exploiting Application-Level Correctness for Low-Cost Fault Tolerance},
  author  = {Xuanhua Li and Donald Yeung},
  year    = 2008,
  journal = {Journal of Instruction-Level Parallelism}
}
@inproceedings{li06,
  title     = {Exploiting soft computing for increased fault tolerance},
  author    = {Xuanhua Li and Donald Yeung},
  year      = 2006,
  booktitle = {ASGI}
}
@inproceedings{freton,
  title     = {A Fault Criticality Evaluation Framework of Digital Systems for Error Tolerant Video Applications},
  author    = {Yuntan Fang and Huawei Li and Xiaowei Li},
  year      = 2011,
  booktitle = {ATS}
}
@article{babakds,
  title   = {Toward Dark Silicon in Servers},
  author  = {Hardavellas, N. and Ferdman, M. and Falsafi, B. and Ailamaki, A.},
  year    = 2011,
  journal = {IEEE Micro}
}
@article{borkarchen,
  title   = {The Future of Microprocessors},
  author  = {Shekhar Borkar and Andrew A. Chen},
  year    = 2011,
  journal = {Communications of the ACM},
  volume  = 54,
  number  = 5
}
@article{sfi,
  title   = {Efficient Software-Based Fault Isolation},
  author  = {Robert Wahbe and Steven Lucco and Thomas Anderson and Susan Graham},
  year    = 1993,
  month   = dec,
  journal = {{ACM SIGOPS} Operating Systems Review},
  volume  = 7,
  number  = 5,
  pages   = {203--216}
}
@article{misfit,
  title   = {{MiSFIT:} Constructing Safe Extensible Systems},
  author  = {Christopher Small and Margo Seltzer},
  year    = 1998,
  month   = {July--September},
  journal = {{IEEE} Concurrency},
  volume  = 6,
  number  = 3,
  pages   = {33--41}
}
@inproceedings{pittsfield,
  title     = {Evaluating {SFI} for a {CISC} Architecture},
  author    = {Stephen McCamant and Greg Morrisett},
  year      = 2006,
  month     = aug,
  booktitle = {15th {USENIX} Security Symposium}
}
@inproceedings{sfi-saman,
  title     = {Secure Execution via Program Shepherding},
  author    = {Vladimir Kiriansky and Derek Bruening and Saman P. Amarasinghe},
  year      = 2002,
  month     = aug,
  booktitle = {11th {USENIX} Security Symposium},
  pages     = {191--206}
}
@inproceedings{nacl1,
  title     = {Native Client: A Sandbox for Portable, Untrusted x86 Native Code},
  author    = {Bennet Yee and David Sehr and Gregory Dardyk and J. Bradley Chen and Robert Muth and Tavis Ormandy and Shiki Okasaka and Neha Narula and and Nicholas Fullagar},
  year      = 2009,
  month     = may,
  booktitle = {{IEEE} Symposium on Security and Privacy}
}
@inproceedings{nacl2,
  title     = {Adapting Software Fault Isolation to Contemporary {CPU} Architectures},
  author    = {David Sehr and Robert Muth and Cliff Biffle and Victor Khimenko and Egor Pasko and Karl Schimpf and Bennet Yee and Brad Chen},
  year      = 2010,
  month     = aug,
  booktitle = {19th {USENIX} Security Symposium}
}
@inproceedings{FPADC,
  title     = {Floating-point Analog to Digital Converter {ADC}},
  author    = {J. Yuan},
  year      = 1999,
  booktitle = {Proceedings of ICECS '99. The 6th IEEE International Conference on Electronics, Circuits and Systems, 1999},
  pages     = {1385--1388},
  vol       = 3
}
@inproceedings{sift,
  title     = {{Object Recognition from Local Scale-Invariant Features}},
  author    = {Lowe, D.G.},
  year      = 1999,
  month     = {},
  booktitle = {Proceedings of the Seventh IEEE International Conference on Computer Vision}
}
@book{KollerPGM,
  title     = {Probabilistic Graphical Models},
  year      = 2009,
  publisher = {MIT Press},
  editor    = {D. Koller and N. Friedman}
}
@article{Kschischang,
  title   = {Iterative Decoding of Compound Codes by Probability Propagation in Graphical Models},
  author  = {F.R. Kschischang and B.J. Frey},
  year    = 1998,
  month   = {Feb.},
  journal = {IEEE Journal on Selected Areas in Communication},
  volume  = 16,
  number  = 2,
  pages   = {219---230}
}
@book{moore,
  title     = {Interval Analysis},
  year      = 1966,
  publisher = {Prentice Hall},
  editor    = {R.E. Moore}
}
@book{cloud,
  title     = {Introduction to Interval Analysis},
  year      = 2009,
  publisher = {Society for Industrial and Applied Mathematics},
  editor    = {M.J. Cloud and R.E. Moore and R. Baker}
}
@book{mackay,
  title     = {Information Theory, Inference, and Learning Algororithms},
  author    = {D.J.C. MacKay},
  year      = 2003,
  publisher = {Cambridge University Press}
}
. in: 12, edited by , 2000.
@inproceedings{freeman,
  title     = {{Correctness of Belief Propagation in Gaussian Graphical Models of Arbitrary Topology}},
  author    = {Y. Weiss and W. T. Freeman},
  year      = 2000,
  month     = {},
  booktitle = {{Advances in Neural Information Processing Systems}},
  editor    = {S. A. Solla and T. K. Leen and K.R. Muller}
}
@article{Keckler11,
  title    = {GPUs and the Future of Parallel Computing},
  author   = {Keckler, S.W. and Dally, W.J. and Khailany, B. and Garland, M. and Glasco, D.},
  year     = 2011,
  month    = {sept.-oct.},
  journal  = {Micro, IEEE},
  volume   = 31,
  number   = 5,
  pages    = {7--17},
  doi      = {10.1109/MM.2011.89},
  issn     = {0272-1732},
  keywords = {GPU-based high-throughput computing systems;Nvidia Research;graphics processing unit;high-performance computing system;parallel computing;single-chip parallel-computing system;computer graphic equipment;coprocessors;parallel processing;}
}
@article{xbox360,
  title    = {Xbox 360 System Architecture},
  author   = {Andrews, J. and Baker, N.},
  year     = 2006,
  month    = {march-april},
  journal  = {Micro, IEEE},
  volume   = 26,
  number   = 2,
  pages    = {25--37},
  doi      = {10.1109/MM.2006.45},
  issn     = {0272-1732},
  abstract = {This article covers the Xbox 360's high-level technical requirements, a short system overview, and details of the CPU and the GPU. The Xbox 360 contains an aggressive hardware architecture and implementation targeted at game console workloads. The core silicon implements the product designers' goal of providing game developers a hardware platform to implement their next-generation game ambitions. The core chips include the standard conceptual blocks of CPU, graphics processing unit (GPU), memory, and I/O. Each of these components and their interconnections are customized to provide a user-friendly game console product. The authors describe their architectural trade-offs and summarize the system's software programming support},
  keywords = {CPU cached data-streaming feature;GPU;Microsoft Xbox 360 game console;Xbox 360 development environment;Xbox 360 system architecture;graphics processing unit;hardware architecture;multiprocessing chip;software programming support;computer architecture;computer games;microprocessor chips;software engineering;}
}
@article{cell,
  title    = {The microarchitecture of the synergistic processor for a cell processor},
  author   = {Flachs, B. and Asano, S. and Dhong, S.H. and Hofstee, H.P. and Gervais, G. and Roy Kim and Le, T. and Peichun Liu and Leenstra, J. and Liberty, J. and Michael, B. and Hwa-Joon Oh and Mueller, S.M. and Takahashi, O. and Hatakeyama, A. and Watanabe, Y. and Yano, N. and Brokenshire, D.A. and Peyravian, M. and Vandung To and Iwata, E.},
  year     = 2006,
  month    = {jan.},
  journal  = {Solid-State Circuits, IEEE Journal of},
  volume   = 41,
  number   = 1,
  pages    = {63--70},
  doi      = {10.1109/JSSC.2005.859332},
  issn     = {0018-9200},
  abstract = {This paper describes an 11 FO4 streaming data processor in the IBM 90-nm SOI-low-k process. The dual-issue, four-way SIMD processor emphasizes achievable performance per area and power. Software controls most aspects of data movement and instruction flow to improve memory system performance and core performance density. The design minimizes instruction latency while providing for fine grain clock control to reduce power.},
  keywords = {11 FO4 streaming data processor; 90 nm; SOI low k process; cell processor; fine grain clock control; four way SIMD processor; instruction latency; software controls; synergistic processor microarchitecture; microprocessor chips;}
}
@inproceedings{SmartMemories,
  title     = {Smart Memories: a modular reconfigurable architecture},
  author    = {Mai, Ken and Paaske, Tim and Jayasena, Nuwan and Ho, Ron and Dally, William J. and Horowitz, Mark},
  year      = 2000,
  booktitle = {Proceedings of the 27th annual international symposium on Computer architecture},
  location  = {Vancouver, British Columbia, Canada},
  publisher = {ACM},
  address   = {New York, NY, USA},
  series    = {ISCA '00},
  pages     = {161--171},
  doi       = {10.1145/339647.339673},
  isbn      = {1-58113-232-8},
  url       = {http://doi.acm.org/10.1145/339647.339673},
  numpages  = 11,
  acmid     = 339673
}
@phdthesis{Putnam09,
  title  = {Microarchitectures and Synthesis Compilers for Efficiently Executing Imperative Language Programs},
  author = {Andrew Putnam},
  year   = 2009,
  month  = {Jun},
  school = {{University of Washington}}
}
@inproceedings{Putnam09a,
  title     = {Performance and power of cache-based reconfigurable computing},
  author    = {Putnam, Andrew and Eggers, Susan and Bennett, Dave and Dellinger, Eric and Mason, Jeff and Styles, Henry and Sundararajan, Prasanna and Wittig, Ralph},
  year      = 2009,
  booktitle = {Proceedings of the 36th annual international symposium on Computer architecture},
  location  = {Austin, TX, USA},
  publisher = {ACM},
  address   = {New York, NY, USA},
  series    = {ISCA '09},
  pages     = {395--405},
  doi       = {10.1145/1555754.1555804},
  isbn      = {978-1-60558-526-0},
  url       = {http://doi.acm.org/10.1145/1555754.1555804},
  numpages  = 11,
  acmid     = 1555804,
  keywords  = {c-to-gates, c-to-hardware, caches, co-processor accelerator, fpga, many-cache, synthesis compiler}
}
@inproceedings{sample:ieeetrans08,
  title     = {Design of an {RFID}-Based Battery-Free Programmable Sensing Platform},
  author    = {Alanson P. Sample and Daniel J. Yeager and Pauline S. Powledge and Alexander V. Mamishev and Joshua R. Smith},
  year      = 2008,
  booktitle = {IEEE Transactions on Instrumentation and Measurement}
}
@inproceedings{datalogger,
  title     = {{Wirelessly-Charged UHF Tags for Sensor Data Collection}},
  author    = {Yeager, D.J. and Powledge, P.S. and Prasad, R. and Wetherall, D. and Smith, J.R.},
  year      = 2008,
  month     = {april},
  booktitle = {RFID, 2008 IEEE International Conference on},
  volume    = {},
  number    = {},
  pages     = {320--327},
  doi       = {10.1109/RFID.2008.4519381},
  issn      = {},
  keywords  = {RFID readers;RFID sensor data logging platform;capacitor;milk carton;passive data logger;sensing platform;wireless identification;wireless sensor;wirelessly-charged UHF tags;wirelessly-charged power model;UHF devices;radiofrequency identification;wireless sensor networks;}
}
@inproceedings{Buettner:RFIDSN,
  title     = {RFID sensor networks with the Intel WISP (Winner, best demo award)},
  author    = {Buettner, Michael and Prasad, Richa and Sample, Alanson and Yeager, Daniel and Greenstein, Ben and Smith, Joshua R. and Wetherall, David},
  year      = 2008,
  booktitle = {Proceedings of the 6th ACM conference on Embedded network sensor systems},
  location  = {Raleigh, NC, USA},
  publisher = {ACM},
  address   = {New York, NY, USA},
  series    = {SenSys '08},
  pages     = {393--394},
  doi       = {http://doi.acm.org/10.1145/1460412.1460468},
  isbn      = {978-1-59593-990-6},
  url       = {http://doi.acm.org/10.1145/1460412.1460468},
  numpages  = 2,
  acmid     = 1460468,
  keywords  = {rfid, sensor networks}
}
@inproceedings{sample:rfid09,
  title   = {A Capacitive Touch Interface for Passive RFID Tags (Winner, best paper award)},
  author  = {Alanson Sample and Daniel Yeager and Joshua R. Smith},
  year    = 2009,
  journal = {Proceedings of the 2009 IEEE International Conference on RFID}
}
@conference{holleman08neural,
  title     = {Neural WISP: An energy harvesting wireless brain interface with 1m range},
  author    = {J. Holleman and D. Yeager and R. Prasad and J. Smith and B. Otis},
  year      = 2008,
  booktitle = {Proceedings of BioCAS 2008}
}
@incollection{yeager08handbook,
  title     = {WISP: A Passively Powered UHF RFID Tag with Sensing and Computation},
  author    = {D. J. Yeager and A. P. Sample and J. R. Smith},
  year      = 2008,
  booktitle = {RFID Handbook: Applications, Technology, Security, and Privacy},
  publisher = {CRC Press},
  editor    = {Syed A. Ahson, Mohammad Ilyas}
}
@inproceedings{chae:rfidsec07,
  title     = {Maximalist cryptography and computation on the WISP UHF RFID tag},
  author    = {Hee-Jin Chae and Daniel J. Yeager and Joshua R. Smith and Kevin Fu},
  year      = 2007,
  booktitle = {Proceedings of RFID Security 2007},
  location  = {Malaga, Spain}
}
@inproceedings{buettner:hotnets08,
  title     = {Revisiting Smart Dust with {RFID} Sensor Networks},
  author    = {Michael Buettner and Ben Greenstein and Alanson Sample and Joshua R. Smith and David Wetherall},
  year      = 2008,
  month     = {October},
  booktitle = {Proc. 7th ACM Workshop on Hot Topics in Networks (Hotnets-VII)},
  address   = {Calgary, Alberta, Canada}
}
@inproceedings{ghostleech,
  title                = {RFIDs and secret handshakes: defending against ghost-and-leech attacks and unauthorized reads with context-aware communications},
  author               = {Czeskis, Alexei   and Koscher, Karl   and Smith, Joshua  R.  and Kohno, Tadayoshi},
  year                 = 2008,
  booktitle            = {CCS '08: Proceedings of the 15th ACM conference on Computer and communications security},
  location             = {Alexandria, Virginia, USA},
  publisher            = {ACM},
  address              = {New York, NY, USA},
  pages                = {479--490},
  doi                  = {http://dx.doi.org/10.1145/1455770.1455831},
  isbn                 = {978-1-59593-810-7},
  url                  = {http://dx.doi.org/10.1145/1455770.1455831},
  citeulike-article-id = 3855029,
  keywords             = {rfid, security},
  posted-at            = {2009-01-06 19:05:39},
  priority             = 3
}
@book{wpsncrfid,
  title     = {Wirelessly powered sensor networks and computational RFID},
  author    = {Joshua R. Smith},
  year      = 2012,
  publisher = {Springer}
}
@inproceedings{analogwisp,
  title     = {Hybrid Analog-Digital Backscatter on an RFID Platform},
  author    = {V. Talla and M. Buettner and D. Wetherall and J. Smith},
  year      = 2012,
  booktitle = {IEEE RFID}
}
@inproceedings{WARP,
  title     = {Experimental Results with Two Wireless Power Transfer Systems},
  author    = {A.P. Sample and J.R. Smith},
  year      = 2009,
  booktitle = {2009IEEE Radio and Wireless Symposium},
  date      = {18-22 January}
}
@inproceedings{WARP2,
  title     = {Wireless Ambient Radio Power},
  author    = {A.P. Sample and A.N. Parks and S. Southwood and J.R. Smith},
  year      = 2012,
  booktitle = {Wirelessly Powered Sensor Networks and Computational RFID},
  editor    = {Joshua R. Smith}
}
@article{WREL,
  title    = {Analysis, Experimental Results, and Range Adaptation of Magnetically Coupled Resonators for Wireless Power Transfer},
  author   = {Sample, A.P. and Meyer, D.A. and Smith, J.R.},
  year     = 2011,
  month    = {feb.},
  journal  = {Industrial Electronics, IEEE Transactions on},
  volume   = 58,
  number   = 2,
  pages    = {544--554},
  doi      = {10.1109/TIE.2010.2046002},
  issn     = {0278-0046},
  keywords = {circuit model;critical coupling;magnetically coupled resonator;maximum operating distance;range adaptation;transmitter-to-receiver distance;wireless power transfer;equivalent circuits;inductive power transmission;resonators;}
}
@article{FREED,
  title    = {Powering a Ventricular Assist Device (VAD) With the Free-Range Resonant Electrical Energy Delivery (FREE-D) System},
  author   = {Waters, B.H. and Sample, A.P. and Bonde, P. and Smith, J.R.},
  year     = 2012,
  month    = {jan.},
  journal  = {Proceedings of the IEEE},
  volume   = 100,
  number   = 1,
  pages    = {138--149},
  doi      = {10.1109/JPROC.2011.2165309},
  issn     = {0018-9219},
  keywords = {FREE-D system;TETS-based technologies;United States;VAD implants;VentrAssist centrifugal pump;adaptive frequency tuning method;angular misalignment;angular orientation;axial pump;data transfer;free-range resonant electrical energy delivery system;heart failure patients;heart transplants;magnetically-coupled resonators;percutaneous wired power connection;portable devices;power cord;resonator configurations;transcutaneous energy transfer systems;ventricular assist device;wireless data communication technology;wireless power transfer efficiency;wireless power transmission efficiency;wireless power transmission range;inductive power transmission;prosthetics;resonators;}
}
@article{verma,
  title    = {Design considerations for ultra-low energy wireless microsensor nodes},
  author   = {Calhoun, B.H. and Daly, D.C. and Naveen Verma and Finchelstein, D.F. and Wentzloff, D.D. and Wang, A. and Seong-Hwan Cho and Chandrakasan, A.P.},
  year     = 2005,
  month    = {jun},
  journal  = {IEEE Transactions on Computers},
  volume   = 54,
  number   = 6,
  pages    = {727--740},
  doi      = {10.1109/TC.2005.98},
  issn     = {0018-9340},
  keywords = {digital circuits; energy-aware system; fine-grained optimization; integrated circuits; low-power design; wireless sensor networks; digital circuits; integrated circuit design; logic partitioning; low-power electronics; microsensors; wireless sensor networks;}
}
@inproceedings{massalin-asplos87,
  title     = {"Superoptimizer - a look at the smallest program."},
  author    = {H. Massalin},
  year      = 1987,
  booktitle = {ASPLOS}
}
@inproceedings{torbjorn-pldi92,
  title     = {"Eliminating branches using a Superoptimizer and the GNU C compiler"},
  author    = {Torbjorn Granlund and Richard Kenner},
  year      = 1992,
  booktitle = {PLDI}
}
@inproceedings{bansal-asplos06,
  title     = {"Automatic generation of peephole superoptimizers"},
  author    = {S. Bansal and A. Aiken},
  year      = 2006,
  booktitle = {ASPLOS}
}
@inproceedings{schkufza-asplos13,
  title     = {"Stochastic Superoptimization"},
  author    = {E. Schkufza, R. Sharma and A. Aiken},
  year      = 2013,
  booktitle = {ASPLOS}
}
@inproceedings{liang-icml10,
  title     = {"Learning programs: A hierarchical bayesian approach"},
  author    = {P. Liang and M. I. Jordan and D. Klein},
  year      = 2010,
  booktitle = {ICML}
}
@inproceedings{solar-lezama-asplos06,
  title     = {"Combinatorial sketching for finite programs"},
  author    = {A. Solar-Lezama and L. Tancau and R. Bodík and S. A. Seshia and V. A. Saraswat},
  year      = 2006,
  booktitle = {ASPLOS}
}
@inproceedings{gulwani-synthesis-survey,
  title     = {Dimensions in Prgram Synthesis},
  author    = {S. Gulwani},
  year      = 2010,
  booktitle = {PPDP}
}
@inproceedings{gulwani-pldi11,
  title     = {"Synthesis of loop-free programs"},
  author    = {S. Gulwani and S. Jha and A. Tiwari and R. Venkatesan},
  year      = 2011,
  booktitle = {PLDI}
}
@inproceedings{weimer-repair,
  title     = {Automatically finding patches using genetic programming},
  author    = {W. Weimer and T. Nguyen and C. Le Goues and S. Forrest},
  year      = 2009,
  booktitle = {ICSE}
}
@inproceedings{joshi-pldi02,
  title     = {"Denali: A goal-directed superoptimizer"},
  author    = {R. Joshi and G. Nelson and K. H. Randall},
  year      = 2002,
  booktitle = {PLDI}
}
@inproceedings{tate-popl09,
  title     = {"Equality saturation: a new approach to optimization"},
  author    = {R. Tate and M. Stepp and Z. Tatlock and S. Lerner},
  year      = 2009,
  booktitle = {POPL}
}
@inproceedings{npu-micro12,
  title     = {"Neural Acceleration for General-Purpose Approximate Programs"},
  author    = {Hadi Esmaeilzadeh and Adrian Sampson and Luis Ceze and Doug Burger},
  year      = 2012,
  booktitle = {MICRO}
}
@inproceedings{NV-Heaps,
  title     = {{NV-Heaps}: Making Persistent Objects Fast and Safe with Next-Generation, Non-Volatile Memories},
  author    = {Coburn, Joel and Caulfield, Adrian M. and Akel, Ameen and Grupp, Laura M. and Gupta, Rajesh K. and Jhala, Ranjit and Swanson, Steven},
  year      = 2011,
  booktitle = {Proceedings of the International Conference on Architectural Support for Programming Languages and Operating Systems}
}
@inproceedings{llvm,
  title     = {{LLVM}: A Compilation Framework for Lifelong Program Analysis and Transformation},
  author    = {Chris Lattner and Vikram Adve},
  year      = 2004,
  booktitle = {CGO}
}
@inproceedings{chaudhuri-fse11,
  title     = {Proving programs robust},
  author    = {Chaudhuri, Swarat and Gulwani, Sumit and Lublinerman, Roberto and Navidpour, Sara},
  year      = 2011,
  booktitle = {FSE}
}
@article{alvarez-ieeecomp05,
  title   = {Fuzzy memoization for floating-point multimedia applications},
  author  = {Alvarez, C. and Corbal, J. and Valero, M.},
  year    = 2005,
  month   = {July},
  journal = {IEEE Transactions on Computers},
  volume  = 54,
  number  = 7,
  pages   = {922--927}
}
@misc{clang,
  title = {Clang: a {C} language family frontend for {LLVM}},
  note  = {\url{http://clang.llvm.org}}
}
@inproceedings{Mnemosyne,
  title     = {Mnemosyne: Lightweight Persistent Memory},
  author    = {Volos, Haris and Tack, Andres Jaan and Swift, Michael M.},
  year      = 2011,
  booktitle = {Proceedings of the 16th International Conference on Architectural Support for Programming Languages and Operating Systems}
}
@inproceedings{drifttolerant,
  title     = {Drift-Tolerant Multilevel Phase-Change Memory},
  author    = {Papandreou, N. and Pozidis, H. and Mittelholzer, T. and Close, G.F. and Breitwisch, M. and Lam, C. and Eleftheriou, E.},
  year      = 2011,
  booktitle = {IEEE International Memory Workshop},
  issn      = {}
}
@article{mlcflash,
  title   = {A multipage cell architecture for high-speed programming multilevel {NAND} flash memories},
  author  = {Takeuchi, K. and Tanaka, T. and Tanzawa, T.},
  year    = 1998,
  journal = {IEEE Journal of Solid-State Circuits},
  volume  = 33,
  number  = 8,
  pages   = {1228--1238},
  issn    = {0018-9200}
}
@inproceedings{coredet-asplos10,
  title     = {{CoreDet: A Compiler and Runtime System for Deterministic Multithreaded Execution}},
  author    = {Tom Bergan and Owen Anderson and Joseph Devietti and Luis Ceze and Dan Grossman},
  year      = 2010,
  month     = 3,
  booktitle = {International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS)}
}
@inproceedings{recon-pldi11,
  title     = {{Isolating and Understanding Concurrency Errors Using Reconstructed Execution Fragments}},
  author    = {Brandon Lucia and Benjamin Wood and Luis Ceze},
  year      = 2011,
  month     = 6,
  booktitle = {Conference on Programming Language Design and Implementation (PLDI)}
}
@inproceedings{inputcov-oopsla13,
  title     = {{Input-Covering Schedules for Multithreaded Programs}},
  author    = {Tom Bergan and Luis Ceze and Dan Grossman},
  year      = 2013,
  month     = 10,
  booktitle = {SPLASH-OOPSLA},
  csetags   = {architecture, luisceze,tbergan,djg, proj-dmp}
}
%%%% added by Dan %%%%
@inproceedings{mit-pldi12,
  title     = {Proving Acceptability Properties of Relaxed Nondeterministic Approximate Programs},
  author    = {Michael Carbin and Deokhwan Kim and Sasa Misailovic and Martin Rinard},
  year      = 2012,
  booktitle = {PLDI}
}
@inproceedings{mit-oopsla13,
  title     = {Verifying Quantitative Reliability for Programs that Execute on Unreliable Hardware},
  author    = {Michael Carbin and Sasa Misailovic and Martin Rinard},
  year      = 2013,
  booktitle = {OOPSLA}
}
@inproceedings{radish-isca2012,
  title     = {RADISH: Always-On Sound and Complete Race Detection in Software and Hardware},
  author    = {Joseph Devietti and Benjamin P. Wood and Karin Strauss and Luis Ceze and Dan Grossman and Shaz Qadeer},
  year      = 2012,
  booktitle = {ISCA}
}
@inproceedings{oopsla2013,
  title     = {Input-Covering Schedules for Multithreaded Programs},
  author    = {Tom Bergan and Luis Ceze and Dan Grossman},
  year      = 2013,
  booktitle = {OOPSLA}
}
@inproceedings{pldi2013,
  title     = {Rely-Guarantee References for Refinement Types over Aliased Mutable Data},
  author    = {Colin S. Gordon and Michael D. Ernst and Dan Grossman},
  year      = 2013,
  booktitle = {PLDI}
}
@inproceedings{pldi2012,
  title     = {Type-Directed Completion of Partial Expressions},
  author    = {Daniel Perelman and Sumit Gulwani and Thomas Ball and Dan Grossman},
  year      = 2012,
  booktitle = {PLDI}
}
@inproceedings{oopsla2012,
  title     = {{IFRit}: Interference-Free Regions for Dynamic Data-Race Detection},
  author    = {Laura Effinger-Dean and Brandon Lucia and Luis Ceze and Dan Grossman and Hans-J. Boehm},
  year      = 2012,
  booktitle = {OOPSLA}
}
@inproceedings{lard-asplos2014,
  title     = {Low-Level Detection of High-Level Data Races with {LARD}},
  author    = {Benjamin P. Wood and Luis Ceze and Dan Grossman},
  year      = 2014,
  booktitle = {ASPLOS}
}
@misc{zeroth,
  title        = {Introducing Qualcomm Zeroth Processors: Brain-Inspired Computing},
  howpublished = {\url{http://www.qualcomm.com/media/blog/2013/10/10/introducing-qualcomm-zeroth-processors-brain-inspired-computing}}
}
@misc{ibmsyn,
  title        = {Neurosynaptic chips},
  howpublished = {\url{http://www.research.ibm.com/cognitive-computing/neurosynaptic-chips.shtml}}
}
@misc{artoolkit,
  title        = {ARToolKit: a software library for building Augmented Reality (AR) applications},
  howpublished = {\url{http://www.hitl.washington.edu/artoolkit/}}
}
@misc{mixare,
  title        = {Open Source Augmented Reality Engine},
  howpublished = {\url{http://www.mixare.org}}
}
@inproceedings{approx-storage,
  title     = {{Approximate Storage in Solid-State Memories}},
  author    = {Adrian Sampson and Jacob Nelson and Karin Strauss and Luis Ceze},
  year      = 2013,
  month     = 12,
  booktitle = {{International Symposium on Microarchitecture (MICRO)}}
}
@inproceedings{wisp,
  title     = {{A wirelessly powered platform for sensing and computation }},
  author    = {Joshua R. Smith and Alanson Sample and Pauline Powledge and Alexander Mamishev and Sumit Roy},
  year      = 2006,
  booktitle = {{International Conference on Ubiquitous Computing}}
}
@inproceedings{soundwave,
  title     = {{SoundWave: Using the Doppler Effect to Sense Gestures}},
  author    = {Sidhant Gupta and Dan Morris and Shwetak Patel and Desney Tan},
  year      = 2012,
  booktitle = {{ACM CHI}}
}
@inproceedings{runahead,
  title     = {{Techniques for Efficient Processing in Runahead Execution Engines}},
  author    = {Mutlu, O. and Hyesoon Kim and Patt, Y.N.},
  year      = 2005,
  booktitle = {{ISCA}}
}
@inproceedings{rinard-hotpar,
  title     = {Parallel Synchronization-Free Approximate Data Structure Construction},
  author    = {Martin Rinard},
  year      = 2013,
  booktitle = {HotPar}
}
@inproceedings{races-ibm,
  title     = {Programming with Relaxed Synchronization},
  author    = {Lakshminarayanan Renganarayanan and Vijayalakshmi Srinivasan and Ravi Nair and Daniel Prener},
  year      = 2012,
  booktitle = {Workshop on Relaxing Synchronization for Multicore and Manycore Scalability (RACES)}
}
@techreport{quickstep,
  title       = {Parallelizing Sequential Programs With Statistical Accuracy Tests},
  author      = {Sasa Misailovic and Deokhwan Kim and Martin Rinard},
  year        = 2010,
  month       = Aug,
  number      = {MIT-CSAIL-TR-2010-038},
  institution = {MIT}
}
@inproceedings{dubstep,
  title     = {Dancing with Uncertainty},
  author    = {Sasa Misailovic and Stelios Sidiroglou and Martin Rinard},
  year      = 2012,
  booktitle = {Workshop on Relaxing Synchronization for Multicore and Manycore Scalability (RACES)}
}
@inproceedings{reps:popl95,
  title     = {Precise Interprocedural Dataflow Analysis via Graph Reachability},
  author    = {T. Reps and S. Horwitz and M. Sagiv},
  year      = 1995,
  booktitle = {POPL}
}
%%%% end of added by Dan %%%%

% wrharris:
@inproceedings{sankaranarayanan07,
  title     = {State space exploration using feedback constraint generation and Monte-Carlo sampling},
  author    = {Sriram Sankaranarayanan and Richard M. Chang and Guofei Jiang and Franjo Ivancic},
  year      = 2007,
  booktitle = {FSE}
}
@inproceedings{schkufza13,
  title     = {Stochastic superoptimization},
  author    = {Eric Schkufza and Rahul Sharma and Alex Aiken},
  year      = 2013,
  booktitle = {{ASPLOS}}
}
@inproceedings{schkufza14,
  title     = {Stochastic optimization of floating-point programs with tunable precision},
  author    = {Eric Schkufza and Rahul Sharma and Alex Aiken},
  year      = 2014,
  booktitle = {{PLDI}}
}
@article{ladder15,
  title     = {Semi-Supervised Learning with Ladder Network},
  author    = {Antti Rasmus and Harri Valpola and Mikko Honkala and Mathias Berglund and Tapani Raiko},
  year      = 2015,
  journal   = {CoRR},
  volume    = {abs/1507.02672},
  url       = {http://arxiv.org/abs/1507.02672},
  timestamp = {Wed, 07 Jun 2017 14:40:07 +0200},
  biburl    = {http://dblp.uni-trier.de/rec/bib/journals/corr/RasmusVHBR15},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}
@article{dorefa:arxiv:2016,
  title   = {DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients},
  author  = {Shuchang Zhou and Zekun Ni and Xinyu Zhou and He Wen and Yuxin Wu and Yuheng Zou},
  year    = 2016,
  journal = {arXiv}
}
@article{li2016ternary,
  title   = {Ternary weight networks},
  author  = {Li, Fengfu and Zhang, Bo and Liu, Bin},
  year    = 2016,
  journal = {arXiv}
}
@article{zhu2016trained,
  title   = {Trained ternary quantization},
  author  = {Zhu, Chenzhuo and Han, Song and Mao, Huizi and Dally, William J},
  year    = 2016,
  journal = {arXiv}
}
@article{qnn:arxiv:2016,
  title   = {Quantized neural networks: Training neural networks with low precision weights and activations},
  author  = {Hubara, Itay and Courbariaux, Matthieu and Soudry, Daniel and El-Yaniv, Ran and Bengio, Yoshua},
  year    = 2016,
  journal = {arXiv}
}
@inproceedings{chen2016efficient,
  title        = {Efficient data supply for hardware accelerators with prefetching and access/execute decoupling},
  author       = {Chen, Tao and Suh, G Edward},
  year         = 2016,
  booktitle    = {Microarchitecture (MICRO), 2016 49th Annual IEEE/ACM International Symposium on},
  pages        = {1--12},
  organization = {IEEE}
}
@inproceedings{stripes:micro:2016,
  title     = {Stripes: Bit-serial deep neural network computing},
  author    = {Judd, Patrick and Albericio, Jorge and Hetherington, Tayler and Aamodt, Tor M and Moshovos, Andreas},
  year      = 2016,
  booktitle = {MICRO}
}
@inproceedings{envision:isscc:2017,
  title     = {DVAFS: Trading computational accuracy for energy through dynamic-voltage-accuracy-frequency-scaling},
  author    = {Moons, Bert and Uytterhoeven, Roel and Dehaene, Wim and Verhelst, Marian},
  year      = 2017,
  booktitle = {DATE}
}
@article{eyeriss:jssc:2017,
  title   = {Eyeriss: An energy-efficient reconfigurable accelerator for deep convolutional neural networks},
  author  = {Chen, Yu-Hsin and Krishna, Tushar and Emer, Joel S and Sze, Vivienne},
  year    = 2017,
  journal = {JSSC}
}
@inproceedings{eyeriss:isca:2016,
  title     = {Eyeriss: A spatial architecture for energy-efficient dataflow for convolutional neural networks},
  author    = {Chen, Yu-Hsin and Emer, Joel and Sze, Vivienne},
  year      = 2016,
  booktitle = {ISCA}
}
@inproceedings{tetris:asplos:2017,
  title     = {TETRIS: Scalable and Efficient Neural Network Acceleration with 3D Memory},
  author    = {Gao, Mingyu and Pu, Jing and Yang, Xuan and Horowitz, Mark and Kozyrakis, Christos},
  year      = 2017,
  booktitle = {ASPLOS}
}
@misc{tetris:simulator,
  title        = {TETRIS: Scalable and Efficient Neural Network Acceleration with 3D Memory},
  author       = {Gao, Mingyu and Pu, Jing and Yang, Xuan and Horowitz, Mark and Kozyrakis, Christos},
  year         = 2017,
  howpublished = {\url{https://github.com/stanford-mast/nn_dataflow}}
}
@inproceedings{eie:isca:2016,
  title     = {EIE: efficient inference engine on compressed deep neural network},
  author    = {Han, Song and Liu, Xingyu and Mao, Huizi and Pu, Jing and Pedram, Ardavan and Horowitz, Mark A and Dally, William J},
  year      = 2016,
  booktitle = {ISCA}
}
@article{tartan:arxiv:2017,
  title   = {Tartan: Accelerating Fully-Connected and Convolutional Layers in Deep Learning Networks by Exploiting Numerical Precision Variability},
  author  = {Alberto Delmas and Sayeh Sharify and Patrick Judd and Andreas Moshovos},
  year    = 2017,
  journal = {arXiv}
}
@inproceedings{moons:vlsi:2016,
  title     = {A 0.3--2.6 TOPS/W precision-scalable processor for real-time large-scale ConvNets},
  author    = {Moons, Bert and Verhelst, Marian},
  year      = 2016,
  booktitle = {VLSI-Circuits}
}
@inproceedings{diannao:asplos:2014,
  title     = {DianNao: a small-footprint high-throughput accelerator for ubiquitous machine-learning},
  author    = {Chen, Tianshi and Du, Zidong and Sun, Ninghui and Wang, Jia and Wu, Chengyong and Chen, Yunji and Temam, Olivier},
  year      = 2014,
  booktitle = {ASPLOS}
}
@inproceedings{dadiannao:micro:2014,
  title     = {Dadiannao: A machine-learning supercomputer},
  author    = {Chen, Yunji and Luo, Tao and Liu, Shaoli and Zhang, Shijin and He, Liqiang and Wang, Jia and Li, Ling and Chen, Tianshi and Xu, Zhiwei and Sun, Ninghui and others},
  year      = 2014,
  booktitle = {MICRO}
}
@inproceedings{dnnweaver:micro:2016,
  title     = {From High-Level Deep Neural Models to FPGAs},
  author    = {Sharma, Hardik and Park, Jongse and Mahajan, Divya and Amaro, Emmanuel and Kim, Joon and Shao, Chenkai and Misra, Asit and Esmaeilzadeh, Hadi},
  year      = 2016,
  booktitle = {MICRO}
}
@misc{onnx,
  title        = {ONNX: an open format to represent deep learning models.},
  author       = {Microsoft, Facebook Research},
  year         = 2017,
  howpublished = {\url{http://onnx.ai/}}
}
@inproceedings{cambricon:isca:2016,
  title     = {Cambricon: An instruction set architecture for neural networks},
  author    = {Liu, Shaoli and Du, Zidong and Tao, Jinhua and Han, Dong and Luo, Tao and Xie, Yuan and Chen, Yunji and Chen, Tianshi},
  year      = 2016,
  booktitle = {ISCA}
}
@inproceedings{cambricon-x:micro:2016,
  title     = {Cambricon-X: An accelerator for sparse neural networks},
  author    = {Zhang, Shijin and Du, Zidong and Zhang, Lei and Lan, Huiying and Liu, Shaoli and Li, Ling and Guo, Qi and Chen, Tianshi and Chen, Yunji},
  year      = 2016,
  booktitle = {MICRO}
}
@inproceedings{240gopsmobile:cvprw:2014,
  title     = {A 240 g-ops/s mobile coprocessor for deep neural networks},
  author    = {Gokhale, Vinayak and Jin, Jonghoon and Dundar, Aysegul and Martini, Berin and Culurciello, Eugenio},
  year      = 2014,
  booktitle = {CVPRW}
}
@inproceedings{dnnoptimizing:fpga:2015,
  title     = {Optimizing FPGA-based Accelerator Design for Deep Convolutional Neural Networks},
  author    = {Zhang, Chen and Li, Peng and Sun, Guangyu and Guan, Yijin and Xiao, Bingjun and Cong, Jason},
  year      = 2015,
  booktitle = {FPGA}
}
@inproceedings{synthviz:iscas:2010,
  title     = {Hardware accelerated convolutional neural networks for synthetic vision systems},
  author    = {Farabet, Cl{\'e}ment and Martini, Berin and Akselrod, Polina and Talay, Sel{\c{c}}uk and LeCun, Yann and Culurciello, Eugenio},
  year      = 2010,
  booktitle = {ISCAS}
}
@inproceedings{neuflow:cvprw:2011,
  title     = {Neuflow: A runtime reconfigurable dataflow processor for vision},
  author    = {Farabet, Cl{\'e}ment and Martini, Berin and Corda, Benoit and Akselrod, Polina and Culurciello, Eugenio and LeCun, Yann},
  year      = 2011,
  booktitle = {CVPRW}
}
@inproceedings{convolutionengine,
  title        = {Convolution engine: balancing efficiency \& flexibility in specialized computing},
  author       = {Qadeer, Wajahat and Hameed, Rehan and Shacham, Ofer and Venkatesan, Preethi and Kozyrakis, Christos and Horowitz, Mark A},
  year         = 2013,
  booktitle    = {ACM SIGARCH Computer Architecture News},
  volume       = 41,
  number       = 3,
  pages        = {24--35},
  organization = {ACM}
}
@inproceedings{iotcnn:isscc:2016,
  title     = {14.6 A 1.42TOPS/W deep convolutional neural network recognition processor for intelligent IoE systems},
  author    = {J. Sim and J. S. Park and M. Kim and D. Bae and Y. Choi and L. S. Kim},
  year      = 2016,
  booktitle = {ISSCC}
}
@inproceedings{openclcnn:fpga:2016,
  title     = {Throughput-Optimized OpenCL-based FPGA Accelerator for Large-Scale Convolutional Neural Networks},
  author    = {Suda, Naveen and Chandra, Vikas and Dasika, Ganesh and Mohanty, Abinash and Ma, Yufei and Vrudhula, Sarma and Seo, Jae-sun and Cao, Yu},
  year      = 2016,
  booktitle = {FPGA}
}
@inproceedings{embedfpgacnn:fpga:2016,
  title     = {Going deeper with embedded fpga platform for convolutional neural network},
  author    = {Qiu, Jiantao and Wang, Jie and Yao, Song and Guo, Kaiyuan and Li, Boxun and Zhou, Erjin and Yu, Jincheng and Tang, Tianqi and Xu, Ningyi and Song, Sen and others},
  year      = 2016,
  booktitle = {FPGA}
}
@inproceedings{neurocube:isca:2016,
  title     = {Neurocube: A programmable digital neuromorphic architecture with high-density 3D memory},
  author    = {Kim, Duckhwan and Kung, Jaeha and Chai, Sek and Yalamanchili, Sudhakar and Mukhopadhyay, Saibal},
  year      = 2016,
  booktitle = {ISCA}
}
@inproceedings{minerva:isca:2016,
  title     = {Minerva: Enabling low-power, highly-accurate deep neural network accelerators},
  author    = {Reagen, Brandon and Whatmough, Paul and Adolf, Robert and Rama, Saketh and Lee, Hyunkwang and Lee, Sae Kyu and Hern{\'a}ndez-Lobato, Jos{\'e} Miguel and Wei, Gu-Yeon and Brooks, David},
  year      = 2016,
  booktitle = {ISCA}
}
@inproceedings{cnvlutin:isca:2016,
  title     = {Cnvlutin: ineffectual-neuron-free deep neural network computing},
  author    = {Albericio, Jorge and Judd, Patrick and Hetherington, Tayler and Aamodt, Tor and Jerger, Natalie Enright and Moshovos, Andreas},
  year      = 2016,
  booktitle = {ISCA}
}
@inproceedings{isaac:isca:2016,
  title     = {ISAAC: A convolutional neural network accelerator with in-situ analog arithmetic in crossbars},
  author    = {Shafiee, Ali and Nag, Anirban and Muralimanohar, Naveen and Balasubramonian, Rajeev and Strachan, John Paul and Hu, Miao and Williams, R Stanley and Srikumar, Vivek},
  year      = 2016,
  booktitle = {ISCA}
}
@inproceedings{prime:isca:2016,
  title     = {Prime: A novel processing-in-memory architecture for neural network computation in reram-based main memory},
  author    = {Chi, Ping and Li, Shuangchen and Xu, Cong and Zhang, Tao and Zhao, Jishen and Liu, Yongpan and Wang, Yu and Xie, Yuan},
  year      = 2016,
  booktitle = {ISCA}
}
@inproceedings{riscintconv:date:2015,
  title     = {A Ultra-low-energy Convolution Engine for Fast Brain-inspired Vision in Multicore Clusters},
  author    = {Conti, Francesco and Benini, Luca},
  year      = 2015,
  booktitle = {DATE}
}
@inproceedings{fusedlayercnn:micro:2016,
  title     = {Fused-layer CNN accelerators},
  author    = {Alwani, Manoj and Chen, Han and Ferdman, Michael and Milder, Peter},
  year      = 2016,
  booktitle = {MICRO}
}
@inproceedings{pudiannao:asplos:2015,
  title     = {Pudiannao: A polyvalent machine learning accelerator},
  author    = {Liu, Daofu and Chen, Tianshi and Liu, Shaoli and Zhou, Jinhong and Zhou, Shengyuan and Teman, Olivier and Feng, Xiaobing and Zhou, Xuehai and Chen, Yunji},
  year      = 2015,
  booktitle = {ASPLOS}
}
@inproceedings{shidiannao:isca:2015,
  title     = {ShiDianNao: shifting vision processing closer to the sensor},
  author    = {Du, Zidong and Fasthuber, Robert and Chen, Tianshi and Ienne, Paolo and Li, Ling and Luo, Tao and Feng, Xiaobing and Chen, Yunji and Temam, Olivier},
  year      = 2015,
  booktitle = {ISCA}
}
@inproceedings{cbrain:dac:2016,
  title     = {C-brain: A Deep Learning Accelerator That Tames the Diversity of CNNs Through Adaptive Data-level Parallelization},
  author    = {Song, Lili and Wang, Ying and Han, Yinhe and Zhao, Xin and Liu, Bosheng and Li, Xiaowei},
  year      = 2016,
  booktitle = {DAC}
}
@inproceedings{deepburning:dac:2016,
  title     = {DeepBurning: Automatic Generation of FPGA-based Learning Accelerators for the Neural Network Family},
  author    = {Wang, Ying and Xu, Jie and Han, Yinhe and Li, Huawei and Li, Xiaowei},
  year      = 2016,
  booktitle = {DAC}
}
@inproceedings{brainwave:hotchips:2017,
  title     = {Accelerating Persistent Neural Networks at Datacenter Scale},
  author    = {Eric Chung and Jeremy Fowers and Kalin Ovtcharov and Michael Papamichael and Adrian Caulfield and Todd Massengil and Ming Liu and Daniel Lo and Shlomi Alkalay and Michael Haselman and Christian Boehn and Oren Firestein and Alessandro Forin and Kang Su Gatlin and Mahdi Ghandi and Stephen Heil and Kyle Holohan and Tamas Juhasz and Ratna Kumar Kovvuri and Sitaram Lanka and Friedel van Megen and Dima Mukhortov and Prerak Patel and Steve Reinhardt and Adam Sapek and Raja Seera and Balaji Sridharan and Lisa Woods and Phillip Yi-Xiao and Ritchie Zhao and Doug Burger},
  year      = 2017,
  booktitle = {HotChips}
}
@misc{apple-a11bionic:wiki:2017,
  title        = {Apple A11-Bionic},
  author       = Apple,
  howpublished = {\url{https://en.wikipedia.org/wiki/Apple_A11}}
}
@article{alexnet,
  title   = {One weird trick for parallelizing convolutional neural networks},
  author  = {Krizhevsky, Alex},
  year    = 2014,
  journal = {arXiv}
}
@article{cifar10,
  title   = {Learning multiple layers of features from tiny images},
  author  = {Krizhevsky, Alex and Hinton, Geoffrey},
  year    = 2009,
  journal = {Computer Science Department, University of Toronto, Tech. Rep}
}
@inproceedings{resnet,
  title     = {Deep residual learning for image recognition},
  author    = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  year      = 2016,
  booktitle = {CVPR}
}
@article{lenet,
  title     = {Gradient-based learning applied to document recognition},
  author    = {LeCun, Yann and Bottou, L{\'e}on and Bengio, Yoshua and Haffner, Patrick},
  year      = 1998,
  journal   = {Proceedings of the IEEE},
  publisher = {IEEE},
  volume    = 86,
  number    = 11,
  pages     = {2278--2324}
}
@article{lstm,
  title   = {Long short-term memory},
  author  = {Hochreiter, Sepp and Schmidhuber, J{\"u}rgen},
  year    = 1997,
  journal = {Neural computation}
}
@article{penn-treebank,
  title   = {Building a large annotated corpus of English: The Penn Treebank},
  author  = {Marcus, Mitchell P and Marcinkiewicz, Mary Ann and Santorini, Beatrice},
  year    = 1993,
  journal = {Computational linguistics}
}
@inproceedings{scnn:isca:2017,
  title     = {{SCNN: An Accelerator for Compressed-sparse Convolutional Neural Networks}},
  author    = {Parashar, Angshuman and Rhu, Minsoo and Mukkara, Anurag and Puglielli, Antonio and Venkatesan, Rangharajan and Khailany, Brucek and Emer, Joel and Keckler, Stephen W and Dally, William J},
  year      = 2017,
  booktitle = {ISCA}
}
@inproceedings{finn:fpga:2017,
  title     = {Finn: A framework for fast, scalable binarized neural network inference},
  author    = {Umuroglu, Yaman and Fraser, Nicholas J and Gambardella, Giulio and Blott, Michaela and Leong, Philip and Jahre, Magnus and Vissers, Kees},
  year      = 2017,
  booktitle = {FPGA}
}
@article{xnornet:arxiv:2017,
  title   = {XNOR-Net: ImageNet Classification Using Binary Convolutional Neural Networks},
  author  = {Mohammad Rastegari and Vicente Ordonez and Joseph Redmon and Ali Farhadi},
  year    = 2016,
  journal = {arXiv}
}
@inproceedings{binarydecompose:dac:2017,
  title     = {A Kernel Decomposition Architecture for Binary-weight Convolutional Neural Networks},
  author    = {Kim, Hyeonuk and Sim, Jaehyeong and Choi, Yeongjae and Kim, Lee-Sup},
  year      = 2017,
  booktitle = {DAC}
}
@article{yodann:arxiv:2017,
  title   = {YodaNN: An Ultra-Low Power Convolutional Neural Network Accelerator Based on Binary Weights},
  author  = {Renzo Andri and Lukas Cavigelli and Davide Rossi and Luca Benini},
  year    = 2016,
  journal = {arXiv}
}
@inproceedings{brein:isscc:2017,
  title     = {BRein memory: A 13-layer 4.2 K neuron/0.8 M synapse binary/ternary reconfigurable in-memory deep neural network accelerator in 65 nm CMOS},
  author    = {Ando, Kota and Ueyoshi, Kodai and Orimo, Kentaro and Yonekawa, Haruyoshi and Sato, Shimpei and Nakahara, Hiroki and Ikebe, Masayuki and Asai, Tetsuya and Takamaeda-Yamazaki, Shinya and Kuroda, Tadahiro and others},
  year      = 2017,
  booktitle = {VLSI}
}
@article{tensorflow,
  title   = {{TensorFlow}: Large-Scale Machine Learning on Heterogeneous Distributed Systems},
  author  = {Abadi, Mart{\'i}n and Agarwal, Ashish and Barham, Paul and Brevdo, Eugene and Chen, Zhifeng and Citro, Craig and Corrado, Greg S. and Davis, Andy and Dean, Jeffrey and Devin, Matthieu and Ghemawat, Sanjay and Goodfellow, Ian and Harp, Andrew and Irving, Geoffrey and Isard, Michael and Jia, Yangqing and Jozefowicz, Rafal and Kaiser, Lukasz and Kudlur, Manjunath and Levenberg, Josh and Mane, Dan and Monga, Rajat and Moore, Sherry and Murray, Derek and Olah, Chris and Schuster, Mike and Shlens, Jonathon and Steiner, Benoit and Sutskever, Ilya and Talwar, Kunal and Tucker, Paul and Vanhoucke, Vincent and Vasudevan, Vijay and Viegas, Fernanda and Vinyals, Oriol and Warden, Pete and Wattenberg, Martin and Wicke, Martin and Yu, Yuan and Zheng, Xiaoqiang},
  year    = 2016,
  journal = {{arXiv}:1603.04467 [cs]}
}
@inproceedings{svhn:nips:2011,
  title     = {Reading digits in natural images with unsupervised feature learning},
  author    = {Netzer, Yuval and Wang, Tao and Coates, Adam and Bissacco, Alessandro and Wu, Bo and Ng, Andrew Y},
  year      = 2011,
  booktitle = {NIPS workshop on deep learning and unsupervised feature learning}
}
@article{lenet-5,
  title     = {Gradient-based learning applied to document recognition},
  author    = {LeCun, Yann and Bottou, L{\'e}on and Bengio, Yoshua and Haffner, Patrick},
  year      = 1998,
  journal   = {Proceedings of the IEEE},
  publisher = {IEEE},
  volume    = 86,
  number    = 11,
  pages     = {2278--2324}
}
@article{vgg,
  title   = {Very deep convolutional networks for large-scale image recognition},
  author  = {Simonyan, Karen and Zisserman, Andrew},
  year    = 2014,
  journal = {arXiv}
}
@inproceedings{fused-cnn,
  title     = {Fused-Layer CNN Accelerator},
  author    = {Manoj Alwani and Han Chen and Michael Ferdman and Peter Milder},
  year      = 2016,
  booktitle = {MICRO}
}
@inproceedings{yongming-isca17,
  title     = {Maximizing CNN Accelerator Efficiency Through Resource Partitioning},
  author    = {Yongming Shen and Michael Ferdman and Peter Milder},
  year      = 2017,
  booktitle = {ISCA}
}
@inproceedings{escher:fccm:2017,
  title     = {Escher: A CNN Accelerator with Flexible Buffering to Minimize Off-Chip Transfer},
  author    = {Shen, Yongming and Ferdman, Michael and Milder, Peter},
  year      = 2017,
  booktitle = {FCCM}
}
@inproceedings{corefusion,
  title     = {Core fusion: accommodating software diversity in chip multiprocessors},
  author    = {Engin Ipek and Meyrem Kirman and Nevin Kirman and Jose F. Martinez},
  year      = 2007,
  booktitle = {ISCA}
}
@inproceedings{tflex,
  title     = {Composable Lightweight Processors},
  author    = {Changkyu Kim and  Simha Sethumadhavan and  M.S. Govindan and Nitya Ranganathan and  Divya Gulati and Doug Burger and Stephen W. Keckler},
  year      = 2007,
  booktitle = {MICRO}
}
@article{loom:arxiv:2017,
  title   = {Loom: Exploiting Weight and Activation Precisions to Accelerate Convolutional Neural Networks},
  author  = {Sayeh Sharify and Alberto Delmas Lascorz and Patrick Judd and Andreas Moshovos},
  year    = 2017,
  journal = {arXiv}
}
@article{deeprecon:IJCNN:2017,
  title   = {DeepRecon: Dynamically reconfigurable architecture for accelerating deep neural networks},
  author  = {Tayyar Rzayev and Saber Moradi and David H. Albonesi and Rajit Manohar},
  year    = 2017,
  journal = {IJCNN}
}
@misc{tensorrt,
  title        = {Nvidia Tensor RT 5.1},
  howpublished = {\url{https://developer.nvidia.com/tensorrt}}
}
@article{wrpn,
  title   = {{WRPN:} Wide Reduced-Precision Networks},
  author  = {Asit K. Mishra and Eriko Nurvitadhi and Jeffrey J. Cook and Debbie Marr},
  year    = 2017,
  journal = {arXiv}
}
@inproceedings{snapea:isca:2018,
  title     = {SnaPEA: Predictive Early Activation for Reducing Computation in Deep Convolutional Neural Networks},
  author    = {Aklaghi, Vahide and Yazdanbakhsh, Amir and Samadi, Kambiz and Esmaeilzadeh, Hadi and K. Gupta, Rajesh},
  year      = 2018,
  booktitle = {ISCA}
}
@inproceedings{ganax:isca:2018,
  title     = {{GANAX: A Unified SIMD-MIMD Acceleration for Generative Adversarial Network}},
  author    = {Yazdanbakhsh, Amir and Falahati, Hajar and Wolfe, Philip J. and Samadi, Kambiz and Esmaeilzadeh, Hadi and Kim, Nam Sung},
  year      = 2018,
  booktitle = {ISCA}
}
@article{borkarscaling,
  title   = {The exascale challenge},
  author  = {Shekhar Borkar},
  year    = 2010,
  journal = {Keynote at International Symposium on VLSI Design, Automation and Test (VLSI- DAT)}
}
@inproceedings{unpu:isscc:2018,
  title     = {UNPU: A 50.6 TOPS/W unified deep neural network accelerator with 1b-to-16b fully-variable weight bit-precision},
  author    = {Lee, Jinmook and Kim, Changhyeon and Kang, Sanghoon and Shin, Dongjoo and Kim, Sangyeob and Yoo, Hoi-Jun},
  year      = 2018,
  booktitle = {ISSCC}
}
@article{switched-capacitor,
  title   = {Analysis and Design of a Passive Switched-Capacitor Matrix Multiplier for Approximate Computing},
  author  = {E. H. Lee and S. S. Wong},
  year    = 2017,
  month   = {Jan},
  journal = {IEEE Journal of Solid-State Circuits},
  volume  = 52,
  number  = 1,
  pages   = {261--271},
  doi     = {10.1109/JSSC.2016.2599536},
  issn    = {0018-9200}
}
@article{dnnspeech,
  title         = {{Low-Latency Neural Speech Translation}},
  author        = {{Niehues}, J. and {Pham}, N.-Q. and {Ha}, T.-L. and {Sperber}, M. and {Waibel}, A.},
  year          = 2018,
  month         = aug,
  journal       = {ArXiv e-prints},
  archiveprefix = {arXiv},
  eprint        = {1808.00491},
  primaryclass  = {cs.CL},
  keywords      = {Computer Science - Computation and Language},
  adsurl        = {http://adsabs.harvard.edu/abs/2018arXiv180800491N},
  adsnote       = {Provided by the SAO/NASA Astrophysics Data System}
}
@article{dnndriving,
  title         = {{SafeDrive: Enhancing Lane Appearance for Autonomous and Assisted Driving Under Limited Visibility}},
  author        = {{Mo}, J. and {Sattar}, J.},
  year          = 2018,
  month         = jul,
  journal       = {ArXiv e-prints},
  archiveprefix = {arXiv},
  eprint        = {1807.11575},
  primaryclass  = {cs.CV},
  keywords      = {Computer Science - Computer Vision and Pattern Recognition},
  adsurl        = {http://adsabs.harvard.edu/abs/2018arXiv180711575M},
  adsnote       = {Provided by the SAO/NASA Astrophysics Data System}
}
@article{dnnsearch,
  title         = {{Using deep Residual Network to search for galaxy-Ly$\{$$\backslash$alpha$\}$ emitter lens candidates based on spectroscopic-selection}},
  author        = {{Li}, R. and {Shu}, Y. and {Su}, J. and {Feng}, H. and {Wang}, J.},
  year          = 2018,
  month         = jul,
  journal       = {ArXiv e-prints},
  archiveprefix = {arXiv},
  eprint        = {1807.11678},
  keywords      = {Astrophysics - Astrophysics of Galaxies},
  adsurl        = {http://adsabs.harvard.edu/abs/2018arXiv180711678L},
  adsnote       = {Provided by the SAO/NASA Astrophysics Data System}
}
@article{dnnadvertising,
  title         = {{RecoGym: A Reinforcement Learning Environment for the problem of Product Recommendation in Online Advertising}},
  author        = {{Rohde}, D. and {Bonner}, S. and {Dunlop}, T. and {Vasile}, F. and {Karatzoglou}, A.},
  year          = 2018,
  month         = aug,
  journal       = {ArXiv e-prints},
  archiveprefix = {arXiv},
  eprint        = {1808.00720},
  primaryclass  = {cs.IR},
  keywords      = {Computer Science - Information Retrieval, Computer Science - Machine Learning},
  adsurl        = {http://adsabs.harvard.edu/abs/2018arXiv180800720R},
  adsnote       = {Provided by the SAO/NASA Astrophysics Data System}
}
@article{dnnmedical,
  title         = {{Development of a sensory-neural network for medical diagnosing}},
  author        = {{Grabec}, I. and {{\v S}vegl}, E. and {Sok}, M.},
  year          = 2018,
  month         = jul,
  journal       = {ArXiv e-prints},
  archiveprefix = {arXiv},
  eprint        = {1807.02477},
  keywords      = {Computer Science - Neural and Evolutionary Computing},
  adsurl        = {http://adsabs.harvard.edu/abs/2018arXiv180702477G},
  adsnote       = {Provided by the SAO/NASA Astrophysics Data System}
}
@article{memristor:deepspiking,
  title         = {An All-Memristor Deep Spiking Neural Network: {A} Step Towards Realizing the Low Power, Stochastic Brain},
  author        = {Parami Wijesinghe and Aayush Ankit and Abhronil Sengupta and Kaushik Roy},
  year          = 2017,
  journal       = {CoRR},
  volume        = {abs/1712.01472},
  url           = {http://arxiv.org/abs/1712.01472},
  archiveprefix = {arXiv},
  eprint        = {1712.01472},
  timestamp     = {Wed, 03 Jan 2018 12:33:17 +0100},
  biburl        = {https://dblp.org/rec/bib/journals/corr/abs-1712-01472},
  bibsource     = {dblp computer science bibliography, https://dblp.org}
}
article{memristor:energyefficient,
	author    = {Aranya Goswamy and
	Sagar Kumashi and
	Vikash Sehwag and
	Siddharth Singh and
	Manny Jain and
	Kaushik Roy and
	Mrigank Sharad},
	title     = {Energy Efficient and High Performance Current-Mode Neural Network
	Circuit using Memristors and Digitally Assisted Analog {CMOS} Neurons},
	journal   = {CoRR},
	volume    = {abs/1511.09085},
	year      = {2015},
	url       = {http://arxiv.org/abs/1511.09085},
	archivePrefix = {arXiv},
	eprint    = {1511.09085},
	timestamp = {Wed, 07 Jun 2017 14:40:21 +0200},
	biburl    = {https://dblp.org/rec/bib/journals/corr/GoswamyKSSJ0S15},
	bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{memristor:models,
  title         = {Memristor models for machine learning},
  author        = {Juan Pablo Carbajal and Joni Dambre and Michiel Hermans and Benjamin Schrauwen},
  year          = 2014,
  journal       = {CoRR},
  volume        = {abs/1406.2210},
  url           = {http://arxiv.org/abs/1406.2210},
  archiveprefix = {arXiv},
  eprint        = {1406.2210},
  timestamp     = {Wed, 07 Jun 2017 14:43:08 +0200},
  biburl        = {https://dblp.org/rec/bib/journals/corr/CarbajalDHS14},
  bibsource     = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{cmos-neural,
  title     = {BRein memory: A 13-layer 4.2 K neuron/0.8 M synapse binary/ternary reconfigurable in-memory deep neural network accelerator in 65 nm CMOS},
  author    = {K. Ando and K. Ueyoshi and K. Orimo and H. Yonekawa and S. Sato and H. Nakahara and M. Ikebe and T. Asai and S. Takamaeda-Yamazaki and T. Kuroda and M. Motomura},
  year      = 2017,
  month     = {June},
  booktitle = {2017 Symposium on VLSI Circuits},
  volume    = {},
  number    = {},
  pages     = {C24-C25},
  doi       = {10.23919/VLSIC.2017.8008533},
  issn      = {},
  keywords  = {CMOS memory circuits;neural nets;versatile reconfigurable accelerator;binary-ternary deep neural networks;massively parallel in-memory processing architecture;binary-ternary DNN;synapse binary-ternary reconfigurable in-memory deep neural network accelerator;power 0.6 W;Random access memory;Computer architecture;Artificial neural networks;Neurons;Biological neural networks;Field programmable gate arrays;Very large scale integration}
}
@article{memristor-develop1,
  title         = {{Perceptrons from Memristors}},
  author        = {{Silva}, F. and {Sanz}, M. and {Seixas}, J. and {Solano}, E. and {Omar}, Y.},
  year          = 2018,
  month         = jul,
  journal       = {ArXiv e-prints},
  archiveprefix = {arXiv},
  eprint        = {1807.04912},
  keywords      = {Computer Science - Emerging Technologies, Computer Science - Neural and Evolutionary Computing, Quantum Physics},
  adsurl        = {http://adsabs.harvard.edu/abs/2018arXiv180704912S},
  adsnote       = {Provided by the SAO/NASA Astrophysics Data System}
}
@inproceedings{memristor-develop2,
  title     = {Design of cmos-memristor circuits for lstm architecture},
  author    = {Smagulova, Kamilya and Adam, Kazybek and Krestinskaya, Olga and James, Alex Pappachen},
  year      = 2018,
  booktitle = {EDSSC}
}
@article{memristor-develop3,
  title         = {Variability analysis of Memristor-based Sigmoid Function},
  author        = {Nursultan Kaiyrbekov and Olga Krestinskaya and Alex Pappachen James},
  year          = 2018,
  journal       = {CoRR},
  volume        = {abs/1805.07679},
  url           = {http://arxiv.org/abs/1805.07679},
  archiveprefix = {arXiv},
  eprint        = {1805.07679},
  timestamp     = {Tue, 05 Jun 2018 18:50:11 +0200},
  biburl        = {https://dblp.org/rec/bib/journals/corr/abs-1805-07679},
  bibsource     = {dblp computer science bibliography, https://dblp.org}
}
@article{zhang2018lq,
  title   = {Lq-nets: Learned quantization for highly accurate and compact deep neural networks},
  author  = {Zhang, Dongqing and Yang, Jiaolong and Ye, Dongqiangzi and Hua, Gang},
  year    = 2018,
  journal = {arXiv preprint arXiv:1807.10029}
}
@article{gong2018highly,
  title   = {Highly Efficient 8-bit Low Precision Inference of Convolutional Neural Networks with IntelCaffe},
  author  = {Gong, Jiong and Shen, Haihao and Zhang, Guoming and Liu, Xiaoli and Li, Shane and Jin, Ge and Maheshwari, Niharika and Fomenko, Evarist and Segal, Eden},
  year    = 2018,
  journal = {arXiv preprint arXiv:1805.08691}
}
@inproceedings{ms_inference:2019,
  title     = {Deep Learning Inference Service at Microsoft},
  author    = {Jonathan Soifer and Jason Li and Mingqin Li and Jeffrey Zhu and Yingnan Li and Yuxiong He and Elton Zheng and Adi Oltean and Maya Mosyak and Chris Barnes and Thomas Liu and Junhua Wang},
  year      = 2019,
  booktitle = {USENIX OpML}
}
@misc{elastic,
  title        = {Amazon Elastic Inference},
  howpublished = {\url{https://aws.amazon.com/machine-learning/elastic-inference/}}
}
@article{baymax:asplos:2016,
  title   = {Baymax: QoS Awareness and Increased Utilization for Non-Preemptive Accelerators in Warehouse Scale Computers},
  author  = {Chen, Quan and Yang, Hailong and Mars, Jason and Tang, Lingjia},
  year    = 2016,
  journal = {ASPLOS}
}
@article{prophet:asplos:2017,
  title   = {Prophet: Precise QoS Prediction on Non-Preemptive Accelerators to Improve Utilization in Warehouse-Scale Computers},
  author  = {Chen, Quan and Yang, Hailong and Guo, Minyi and Kannan, Ram Srivatsa and Mars, Jason and Tang, Lingjia},
  year    = 2017,
  journal = {ASPLOS}
}
@article{cpu_sharing:vldb:2013,
  title   = {CPU Sharing Techniques for Performance Isolation in Multi-Tenant Relational Database-as-a-Service},
  author  = {Das, Sudipto and Narasayya, Vivek R. and Li, Feng and Syamala, Manoj},
  year    = 2013,
  journal = {Proc. VLDB Endow.}
}
@inproceedings{strings:sc:2014,
  title     = {Scheduling Multi-Tenant Cloud Workloads on Accelerator-Based Systems},
  author    = {Sengupta, Dipanjan and Goswami, Anshuman and Schwan, Karsten and Pallavi, Krishna},
  year      = 2014,
  booktitle = {SC}
}
@article{chimera:asplos:2015,
  title   = {Chimera: Collaborative Preemption for Multitasking on a Shared GPU},
  author  = {Park, Jason Jong Kyu and Park, Yongjun and Mahlke, Scott},
  year    = 2015,
  journal = {ASPLOS}
}
@inproceedings{preemptive_gpus:isca:2014,
  title     = {Enabling Preemptive Multiprogramming on GPUs},
  author    = {Tanasic, Ivan and Gelado, Isaac and Cabezas, Javier and Ramirez, Alex and Navarro, Nacho and Valero, Mateo},
  year      = 2014,
  booktitle = {ISCA}
}
@article{gpu_maestro:asplos:2017,
  title   = {Dynamic Resource Management for Efficient Utilization of Multitasking GPUs},
  author  = {Park, Jason Jong Kyu and Park, Yongjun and Mahlke, Scott},
  year    = 2017,
  journal = {ASPLOS}
}
@article{interrupts:tc:88,
  title   = {Implementing Precise Interrupts in Pipelined Processors},
  author  = {Smith, James E. and Pleszkun, Andrew R.},
  year    = 1988,
  journal = {TC}
}
@inproceedings{multikernel_gpu:hpca:2016,
  title     = {Simultaneous Multikernel GPU: Multi-tasking throughput processors via fine-grained sharing},
  author    = {Z. {Wang} and J. {Yang} and R. {Melhem} and B. {Childers} and Y. {Zhang} and M. {Guo}},
  year      = 2016,
  booktitle = {HPCA}
}
@inproceedings{mystic:ipdps:2016,
  title     = {Mystic: Predictive Scheduling for GPU Based Cloud Servers Using Machine Learning},
  author    = {Y. {Ukidave} and X. {Li} and D. {Kaeli}},
  year      = 2016,
  booktitle = {IPDPS}
}
@inproceedings{simt_context_switch:sc:2016,
  title     = {Enabling Efficient Preemption for SIMT Architectures with Lightweight Context Switching},
  author    = {Z. {Lin} and L. {Nyland} and H. {Zhou}},
  year      = 2016,
  booktitle = {SC}
}
@article{tensorrt_utilization,
  title  = {Maximizing GPU Utilization For Datacenter Inference with NVIDIA TensorRT Inference Server},
  author = {Tripti Singhal},
  year   = 2019
}
@inproceedings{qos_warehouse:cgo:2012,
  title     = {Compiling for niceness: Mitigating contention for qos in warehouse scale computers},
  author    = {Tang, Lingjia and Mars, Jason and Soffa, Mary Lou},
  year      = 2012,
  booktitle = {CGO}
}
@inproceedings{whare:isca:2013,
  title     = {Whare-map: heterogeneity in" homogeneous" warehouse-scale computers},
  author    = {Mars, Jason and Tang, Lingjia},
  year      = 2013,
  booktitle = {ISCA}
}
@inproceedings{bubble:micro:2011,
  title     = {Bubble-up: Increasing utilization in modern warehouse scale computers via sensible co-locations},
  author    = {Mars, Jason and Tang, Lingjia and Hundt, Robert and Skadron, Kevin and Soffa, Mary Lou},
  year      = 2011,
  booktitle = {MICRO}
}
@inproceedings{performance_analysis:ispass:2012,
  title     = {Performance analysis of thread mappings with a holistic view of the hardware resources},
  author    = {Wang, Wei and Dey, Tanima and Mars, Jason and Tang, Lingjia and Davidson, Jack W and Soffa, Mary Lou},
  year      = 2012,
  booktitle = {ISPASS}
}
@inproceedings{smite:micro:2014,
  title     = {Smite: Precise qos prediction on real-system smt processors to improve utilization in warehouse scale computers},
  author    = {Zhang, Yunqi and Laurenzano, Michael A and Mars, Jason and Tang, Lingjia},
  year      = 2014,
  booktitle = {MICRO}
}
@article{hp_turing:2019,
  title   = {A new golden age for computer architecture},
  author  = {Hennessy, John L and Patterson, David A},
  year    = 2019,
  journal = {{CACM} and {Turing Lecture}}
}
@misc{edge_tpu,
  title        = {Edge {TPU}},
  howpublished = {\url{https://cloud.google.com/edge-tpu/}}
}
@article{deeprecsys:isca:2020,
  title   = {DeepRecSys: A System for Optimizing End-To-End At-scale Neural Recommendation Inference},
  author  = {Gupta, Udit and Hsia, Samuel and Saraph, Vikram and Wang, Xiaodong and Reagen, Brandon and Wei, Gu-Yeon and Lee, Hsien-Hsin S and Brooks, David and Wu, Carole-Jean},
  year    = 2020,
  journal = {ISCA}
}
@article{simultaneous_dnn:isca:2020,
  title   = {A Multi-Neural Network Acceleration Architecture},
  author  = {Eunjin Baek and Dongup Kwon and Jangwoo Kim},
  year    = 2020,
  journal = {ISCA}
}
@misc{bionic_chip,
  title        = {Apple says its new A13 Bionic chiop brings hours of extra battery life to new iphones},
  howpublished = {\url{https://www.theverge.com/circuitbreaker/2019/9/10/20857177/apple-iphone-11-processor-a13-cpu-speed-graphics-specs}}
}
@misc{nvidia:jetson,
  title        = {NVIDIA {Jetson}: The {AI} Platform for Autonomous Machines},
  howpublished = {\url{https://developer.nvidia.com/embedded/develop/hardware}}
}
@misc{nvidia:t4,
  title        = {NVIDIA {T4}: Tensor Core {GPU} for {AI} Inference},
  howpublished = {\url{https://www.nvidia.com/en-us/data-center/tesla-t4/}}
}
@article{infaas:arxiv:2019,
  title   = {{INFaaS}: Managed \& Model-less Inference Serving},
  author  = {Romero, Francisco and Li, Qian and Yadwadkar, Neeraja J and Kozyrakis, Christos},
  year    = 2019,
  journal = {arXiv}
}
@misc{google_assistant,
  title        = {Google Assistant},
  howpublished = {\url{https://assistant.google.com}}
}
@misc{apple_siri,
  title        = {Apple Siri},
  howpublished = {\url{https://www.apple.com/siri/}}
}
@misc{amazon_alexa,
  title        = {Amazon Alexa},
  howpublished = {\url{https://developer.amazon.com/en-US/alexa/}}
}
@misc{kubernetes,
  title        = {Kubernetes},
  howpublished = {\url{https://kubernetes.io}}
}
@inproceedings{horowitz:isca:2010,
  title     = {Understanding sources of inefficiency in general-purpose chips},
  author    = {Hameed, Rehan and Qadeer, Wajahat and Wachs, Megan and Azizi, Omid and Solomatnikov, Alex and Lee, Benjamin C and Richardson, Stephen and Kozyrakis, Christos and Horowitz, Mark},
  year      = 2010,
  booktitle = {ISCA}
}
@inproceedings{facebook_datacenter:hpca:2018,
  title     = {Applied machine learning at facebook: A datacenter infrastructure perspective},
  author    = {Hazelwood, Kim and Bird, Sarah and Brooks, David and Chintala, Soumith and Diril, Utku and Dzhulgakov, Dmytro and Fawzy, Mohamed and Jia, Bill and Jia, Yangqing and Kalro, Aditya and others},
  year      = 2018,
  booktitle = {2018 IEEE International Symposium on High Performance Computer Architecture (HPCA)}
}
@misc{sagemaker,
  title        = {Amazon SageMaker},
  howpublished = {\url{https://aws.amazon.com/sagemaker/}}
}
@misc{tensorrt_inference,
  title        = {NVIDIA Triton Inference Server},
  howpublished = {\url{https://github.com/NVIDIA/triton-inference-server/}}
}
@misc{tensorflow_serving,
  title        = {TensorFlow Serving},
  howpublished = {\url{https://www.tensorflow.org/tfx/guide/serving}}
}
@misc{azure_ml,
  title        = {Azure Machine Learning},
  howpublished = {\url{https://azure.microsoft.com/en-us/services/machine-learning/}}
}
@misc{google_cloud,
  title        = {Google Cloud},
  howpublished = {\url{https://cloud.google.com/products/ai/}}
}
@misc{google_casestudy,
  title        = {Google Cloud Customers},
  howpublished = {\url{https://cloud.google.com/customers/}}
}
@misc{sagemaker_casestudy,
  title        = {Amazon SageMaker Customers},
  howpublished = {\url{https://aws.amazon.com/sagemaker/customers/}}
}
@misc{amazon_casestudy,
  title        = {Amazon Case Studies},
  howpublished = {\url{https://aws.amazon.com/solutions/case-studies/}}
}
@misc{cliff,
  title        = {Zero-Shot Translation with Google’s Multilingual Neural Machine Translation System},
  howpublished = {\url{https://ai.googleblog.com/2016/11/zero-shot-translation-with-googles.html}}
}
@inproceedings{event_qos:hpca:2015,
  title     = {Event-based scheduling for energy-efficient qos (eqos) in mobile web applications},
  author    = {Zhu, Yuhao and Halpern, Matthew and Reddi, Vijay Janapa},
  year      = 2015,
  booktitle = {HPCA}
}
@inproceedings{microarch_event:micro:2015,
  title     = {Microarchitectural implications of event-driven server-side web applications},
  author    = {Zhu, Yuhao and Richins, Daniel and Halpern, Matthew and Reddi, Vijay Janapa},
  year      = 2015,
  booktitle = {MICRO}
}
@inproceedings{task_assignment_gpu:ics:2015,
  title     = {Enabling and exploiting flexible task assignment on GPU through SM-centric program transformations},
  author    = {Wu, Bo and Chen, Guoyang and Li, Dong and Shen, Xipeng and Vetter, Jeffrey},
  year      = 2015,
  booktitle = {ICS}
}
@inproceedings{free_launch:micro:2015,
  title     = {Free launch: optimizing GPU dynamic kernel launches through thread reuse},
  author    = {Chen, Guoyang and Shen, Xipeng},
  year      = 2015,
  booktitle = {MICRO}
}
@inproceedings{flep:asplos:2017,
  title     = {FLEP: Enabling Flexible and Efficient Preemption on GPUs},
  author    = {Wu, Bo and Liu, Xu and Zhou, Xiaobo and Jiang, Changjun},
  year      = 2017,
  booktitle = {ASPLOS}
}
@inproceedings{ubik:asplos:2014,
  title     = {{Ubik: Efficient Cache Sharing with Strict QoS for Latency-Critical Workloads}},
  author    = {Harshad Kasture and Daniel Sanchez},
  year      = 2014,
  booktitle = {ASPLOS}
}
@inproceedings{tarcil:socc:2015,
  title     = {{Tarcil: Reconciling Scheduling Speed and Quality in Large Shared Clusters}},
  author    = {Christina Delimitrou and Daniel Sanchez and Christos Kozyrakis},
  year      = 2015,
  booktitle = {SoCC}
}
@inproceedings{tpshare:isca:2019,
  title     = {TPShare: a time-space sharing scheduling abstraction for shared cloud via vertical labels},
  author    = {Wang, Yuzhao and Li, Lele and Wu, You and Yu, Junqing and Yu, Zhibin and Qian, Xuehai},
  year      = 2019,
  booktitle = {ISCA}
}
@inproceedings{sharing_aware:ipdps:2017,
  title     = {Power efficient sharing-aware GPU data management},
  author    = {Tabbakh, Abdulaziz and Annavaram, Murali and Qian, Xuehai},
  year      = 2017,
  booktitle = {IPDPS}
}
@inproceedings{zorua:micro:2016,
  title     = {Zorua: A holistic approach to resource virtualization in gpus},
  author    = {Vijaykumar, Nandita and Hsieh, Kevin and Pekhimenko, Gennady and Khan, Samira and Shrestha, Ashish and Ghose, Saugata and Jog, Adwait and Gibbons, Phillip B and Mutlu, Onur},
  year      = 2016,
  booktitle = {MICRO}
}
@inproceedings{cloud_os:socc:2010,
  title     = {An operating system for multicore and clouds: Mechanisms and implementation},
  author    = {Wentzlaff, David and Gruenwald III, Charles and Beckmann, Nathan and Modzelewski, Kevin and Belay, Adam and Youseff, Lamia and Miller, Jason and Agarwal, Anant},
  year      = 2010,
  booktitle = {SoCC}
}
@inproceedings{curious:socc:2018,
  title     = {The curious case of container orchestration and scheduling in gpu-based datacenters},
  author    = {Thinakaran, Prashanth and Raj, Jashwant and Sharma, Bikash and Kandemir, Mahmut T and Das, Chita R},
  year      = 2018,
  booktitle = {SoCC}
}
@inproceedings{anatomy_gpu:memsys:2015,
  title     = {Anatomy of gpu memory system for multi-application execution},
  author    = {Jog, Adwait and Kayiran, Onur and Kesten, Tuba and Pattnaik, Ashutosh and Bolotin, Evgeny and Chatterjee, Niladrish and Keckler, Stephen W and Kandemir, Mahmut T and Das, Chita R},
  year      = 2015,
  booktitle = {MEMSYS}
}
@inproceedings{serenity:mlsys:2020,
  title     = {Ordering Chaos: Memory-Aware Scheduling of Irregularly Wired Neural Networks for Edge Devices},
  author    = {Ahn, Byung Hoon and Lee, Jinwon and Lin, Jamie Menjay and Cheng, Hsin-Pai and Hou, Jilei and Esmaeilzadeh, Hadi},
  year      = 2020,
  booktitle = {MLSys}
}
@inproceedings{chameleon:iclr:2020,
  title     = {Chameleon: Adaptive Code Optimization for Expedited Deep Neural Network Compilation},
  author    = {Byung Hoon Ahn and Prannoy Pilligundla and Hadi Esmaeilzadeh},
  year      = 2020,
  booktitle = {ICLR}
}
@inproceedings{kubeknots:cluster:2019,
  title     = {Kube-Knots: Resource Harvesting through Dynamic Container Orchestration in GPU-based Datacenters},
  author    = {Thinakaran, Prashanth and Gunasekaran, Jashwant Raj and Sharma, Bikash and Kandemir, Mahmut Taylan and Das, Chita R},
  year      = 2019,
  booktitle = {CLUSTER}
}
@inproceedings{server_consolidation:iiswc:2007,
  title     = {An evaluation of server consolidation workloads for multi-core designs},
  author    = {Jerger, Natalie Enright and Vantrease, Dana and Lipasti, Mikko},
  year      = 2007,
  booktitle = {IISWC}
}
@inproceedings{mesos,
  title     = {Mesos: a platform for fine-grained resource sharing in the data center},
  author    = {Hindman, Benjamin and Konwinski, Andy and Zaharia, Matei and Ghodsi, Ali and Joseph, Anthony D and Katz, Randy and Shenker, Scott and Stoica, Ion},
  year      = 2011,
  booktitle = {NSDI}
}
@inproceedings{dirigent:asplos:2016,
  title     = {Dirigent: Enforcing QoS for Latency-Critical Tasks on Shared Multicore Systems},
  author    = {Haishan Zhu and Mattan Erez},
  year      = 2016,
  booktitle = {ASPLOS}
}
@inproceedings{qos_gpu:cikm:2017,
  title     = {Qos-aware scheduling of heterogeneous servers for inference in deep neural networks},
  author    = {Fang, Zhou and Yu, Tong and Mengshoel, Ole J and Gupta, Rajesh K},
  year      = 2017,
  booktitle = {CIKM}
}
@inproceedings{gpu_sharing:hpdc:2011,
  title     = {Supporting GPU Sharing in Cloud Environments with a Transparent Runtime Consolidation Framework},
  author    = {Ravi, Vignesh T. and Becchi, Michela and Agrawal, Gagan and Chakradhar, Srimat},
  year      = 2011,
  booktitle = {HPDC}
}
@inproceedings{gpu_multi:hpdc:2012,
  title     = {A Virtual Memory Based Runtime to Support Multi-Tenancy in Clusters with GPUs},
  author    = {Becchi, Michela and Sajjapongse, Kittisak and Graves, Ian and Procter, Adam and Ravi, Vignesh and Chakradhar, Srimat},
  year      = 2012,
  booktitle = {HPDC}
}