zotero-pdf-biblio.bib


@inproceedings{lindenhoferCuriousExplorationMalicious2020,
	address = {Valletta, Malta},
	title = {A {Curious} {Exploration} of {Malicious} {PDF} {Documents}:},
	isbn = {978-989-758-399-5},
	shorttitle = {A {Curious} {Exploration} of {Malicious} {PDF} {Documents}},
	url = {http://www.scitepress.org/DigitalLibrary/Link.aspx?doi=10.5220/0008992305770584},
	doi = {10.5220/0008992305770584},
	abstract = {The storage, modification and exchange of digital information are core processes in our internet connected world. Common document formats enable this digital information infrastructure. More specifically, the widely used PDF document format is a commodity container for digital information. Although PDF files are a well established format, users may not know that they contain not only simple textual information, but can also embed pieces of program code, sometimes malicious code. This paper explores the capabilities of the PDF format and the potential of its built-in functions for malicious purposes. PDF file processors that implement the full PDF standard also potentially enable credential phishing, loss of privacy, malicious code execution and similar attacks via PDF documents. Furthermore, this paper discusses the results of practically evaluated, working code snippets of PDF feature misuse and strategies to obfuscate and hide malicious code parts in a PDF document, while still conforming to the PDF standard.},
	language = {en},
	urldate = {2021-03-06},
	booktitle = {Proceedings of the 6th {International} {Conference} on {Information} {Systems} {Security} and {Privacy}},
	publisher = {SCITEPRESS - Science and Technology Publications},
	author = {Lindenhofer, Julian and Offenthaler, Rene and Pirker, Martin},
	year = {2020},
	pages = {577--584},
	file = {Lindenhofer et al. - 2020 - A Curious Exploration of Malicious PDF Documents.pdf:/Users/tullsen/Zotero/storage/U7UF8YVG/Lindenhofer et al. - 2020 - A Curious Exploration of Malicious PDF Documents.pdf:application/pdf},
}

@article{leeGrapplingScaleBornDigital2021,
	title = {Grappling with the {Scale} of {Born}-{Digital} {Government} {Publications}: {Toward} {Pipelines} for {Processing} and {Searching} {Millions} of {PDFs}},
	shorttitle = {Grappling with the {Scale} of {Born}-{Digital} {Government} {Publications}},
	url = {http://arxiv.org/abs/2112.02471},
	abstract = {Official government publications are key sources for understanding the history of societies. Web publishing has fundamentally changed the scale and processes by which governments produce and disseminate information. Significantly, a range of web archiving programs have captured massive troves of government publications. For example, hundreds of millions of unique U.S. Government documents posted to the web in PDF form have been archived by libraries to date. Yet, these PDFs remain largely unutilized and understudied in part due to the challenges surrounding the development of scalable pipelines for searching and analyzing them. This paper utilizes a Library of Congress dataset of 1,000 government PDFs in order to offer initial approaches for searching and analyzing these PDFs at scale. In addition to demonstrating the utility of PDF metadata, this paper offers computationally-efficient machine learning approaches to search and discovery that utilize the PDFs' textual and visual features as well. We conclude by detailing how these methods can be operationalized at scale in order to support systems for navigating millions of PDFs.},
	urldate = {2022-01-23},
	journal = {arXiv:2112.02471 [cs]},
	author = {Lee, Benjamin Charles Germain and Owens, Trevor},
	month = dec,
	year = {2021},
	note = {arXiv: 2112.02471
22 pages, 4 figures},
	keywords = {Computer Science - Digital Libraries, Computer Science - Information Retrieval},
	file = {Lee and Owens - 2021 - Grappling with the Scale of Born-Digital Governmen.pdf:/Users/tullsen/Zotero/storage/QT2WGCAN/Lee and Owens - 2021 - Grappling with the Scale of Born-Digital Governmen.pdf:application/pdf},
}

@inproceedings{djukicDomainspecificModelingDocument2021,
	address = {Limerick Ireland},
	title = {Domain-specific modeling in document engineering},
	isbn = {978-1-4503-8596-1},
	url = {https://dl.acm.org/doi/10.1145/3469096.3470949},
	doi = {10.1145/3469096.3470949},
	language = {en},
	urldate = {2021-08-17},
	booktitle = {Proceedings of the 21st {ACM} {Symposium} on {Document} {Engineering}},
	publisher = {ACM},
	author = {Djukić, Verislav and Tolvanen, Juha-Pekka},
	month = aug,
	year = {2021},
	pages = {1--2},
}

@article{mosheExploitingURLParsers2022,
	title = {Exploiting {URL} parsers: {The} {Good}, {Bad}, and {Inconsistent}},
	url = {https://claroty.com/wp-content/uploads/2022/01/Exploiting-URL-Parsing-Confusion.pdf},
	abstract = {The Uniform Resource Locator (URL) is integral to our lives online because we use it for surfing the web, accessing files, and joining video chats. If you click on a URL or type it into a browser, you’re requesting a resource hosted somewhere online. As a result, some devices such as our browsers, applications, and servers must receive our URL, parse it into its uniform resource identifier (URI) components (e.g. hostname, path, etc.) and fetch the requested resource.
The syntax of URLs is complex, and although different libraries can parse them accurately, it is plausible for the same URL
to be parsed differently by different libraries. The confusion in URL parsing can cause unexpected behavior in the software
(e.g. web application), and could be exploited by threat actors to cause denial-of-service conditions, information leaks, or
possibly conduct remote code execution attacks.
In Team82's joint research with Snyk, we examined 16 URL parsing libraries, written in a variety of programming
languages, and noticed some inconsistencies with how each chooses to parse a given URL to its basic components. We
categorized the types of inconsistencies into five categories, and searched for problematic code flows in web applications
and open source libraries that exposed a number of vulnerabilities.
We learned that most of the eight vulnerabilities we found largely occurred for two reasons:
1. Multiple Parsers in Use: Whether by design or an oversight, developers sometimes use more than one URL parsing library in projects. Because some libraries may parse the same URL differently, vulnerabilities could be introduced
into the code.
2. Specification Incompatibility: Different parsing libraries are written according to different RFCs or URL specifications, which creates inconsistencies by design. This also leads to vulnerabilities because developers may not be familiar with the differences between URL specifications and their implications (e.g. what should be checked or sanitized)},
	language = {English},
	author = {Moshe, Noam and {Sharon Brizinov} and {Raul Onitza-Klugman} and {Kirill Efimov}},
	month = jan,
	year = {2022},
	pages = {34},
	file = {Moshe - 2021 - EXPLOITING URL PARSERS THE GOOD, BAD, AND INCONSI.pdf:/Users/tullsen/Zotero/storage/BFA7CJED/Moshe - 2021 - EXPLOITING URL PARSERS THE GOOD, BAD, AND INCONSI.pdf:application/pdf},
}

@misc{noammosheExploitingURLParsing2022,
	title = {Exploiting {URL} {Parsing} {Confusion} {Vulnerabilities}},
	url = {https://claroty.com/2022/01/10/blog-research-exploiting-url-parsing-confusion/},
	abstract = {A joint Claroty Team82-Snyk research collaboration uncovered URL parsing confusion vulnerabilities in popular parsing libraries.},
	language = {English},
	urldate = {2022-01-15},
	journal = {Claroty},
	author = {{Noam Moshe} and {Sharon Brizinov}},
	month = jan,
	year = {2022},
}

@inproceedings{singhPDFClassificationUsing2022,
	address = {Singapore},
	series = {Lecture {Notes} in {Networks} and {Systems}},
	title = {{PDF} {Classification} {Using} {Logistic} {Regression} and {Latent} {Dirichlet} {Allocation}},
	isbn = {9789811664076},
	url = {https://link.springer.com/chapter/10.1007/978-981-16-6407-6_36},
	doi = {10.1007/978-981-16-6407-6_36},
	abstract = {Over the past few years, the classification of documents has been a challenging task. For document classification, no effective method or structured process has been developed so far to assign suitable labels to a large set of documents. Hence there is a need to develop an efficient and accurate method for the classification of PDF documents. The paper proposes a novel method which is capable of classifying PDF documents into most appropriate class and subclass. The proposed framework uses Machine learning with logistic regression classifier as a supervised approach along with Latent Dirichlet allocation (LDA). A combination of two different datasets has been used, the first dataset is ARXIV data of more than 31000 research paper meta data dated between 1992 and 2017 and the second one is created by the authors on their own. The method achieved a significant accuracy comparable with the existing approaches.},
	language = {English},
	booktitle = {Proceedings of the 2nd {International} {Conference} on {Recent} {Trends} in {Machine} {Learning}, {IoT}, {Smart} {Cities} and {Applications}},
	publisher = {Springer},
	author = {Singh, Divyanshu and Bhatnagar, Mansi and Yadav, Vrinda},
	editor = {Gunjan, Vinit Kumar and Zurada, Jacek M.},
	month = jan,
	year = {2022},
	keywords = {LDA, Machine learning, Natural language processing, PDF classification},
	pages = {399--407},
}

@incollection{vishnuPDFMalwareClassifiers2022,
	address = {Boca Raton, USA},
	edition = {1},
	title = {{PDF} {Malware} {Classifiers} – {A} {Survey}, {Future} {Directions} and {Recommended} {Methodology}},
	isbn = {978-0-367-80822-8},
	url = {https://www.taylorfrancis.com/chapters/edit/10.1201/9780367808228-7/pdf-malware-classifiers-survey-future-directions-recommended-methodology-vishnu-sripada-manasa-lakshmi-kavita-sahil-verma-awadhesh-kumar-shukla},
	abstract = {Malicious software continues to pose a major threat to the cyber world. Text files are the most frequently used vectors to infect various systems using malware. In all this, to execute the attack, the intruder attempts to merge the malignant code with the benevolent text data. Due to its compatibility and lightweight characteristics, PDF (portable document format) is the most widely used file method of sharing documents. In today's world, attackers are using cutting-edge methods to obfuscate malware concealed inside document files. So, it is difficult for malware detection classifiers to effectively identify the text. To understand their design and working procedures, we surveyed different types of learning-based PDF malware classifiers. Also, we have described the pdf document by which we can understand the workings of malware. Finally, we recommended a methodology on the basis of the literature survey and specified the future direction for the better classification results. This work is the extension of dissertation.},
	language = {English},
	booktitle = {Information {Security} {Handbook}},
	publisher = {CRC Press},
	author = {Vishnu, N. S. and Lakshmi, Sripada Manasa and Kavita and Verma, Sahil and Shukla, Awadhesh Kumar},
	month = feb,
	year = {2022},
	pages = {24},
}

@misc{alghamdiExtractingToCMetadata2022,
	title = {Extracting {ToC} and {Metadata} from {PDF} {Books}: {A} {Rule}-{Based} {Approach}},
	shorttitle = {Extracting {ToC} and {Metadata} from {PDF} {Books}},
	url = {https://doi.org/10.24507/icicelb.13.02.133},
	abstract = {In recent years, e-books in PDF format have been relied upon as a huge repository of printed form of knowledge. More than 150 publishing houses in various academic, industrial, and other ﬁelds are interested in developing e-books for ease of use, printing, and sharing over the Internet. However, the methods of automatic extraction of e-book information in PDF format are not easy, and they need continuous improvement and development to enhance the eﬃcient usage of e-books by means of searching over the web, indexing by search engines and archiving/retrieval by the digital libraries. Thus, this paper proposes a high-accuracy rule-based approach to extracting the metadata (book title, author’s name, and year of publication) and table of contents (section number, section title, and section page number) from PDF books. The proposed approach achieved 89.22\% accuracy of information extraction in contrast to classical schemes of literature.},
	language = {English},
	urldate = {2021-12-29},
	publisher = {ICIC International},
	author = {Alghamdi, Huda and Dawwas, Waad and Almutairi, Taghreed H and , Atta-ur-Rahman},
	year = {2022},
	note = {ICIC Express Letters
Part B: Applications 
ICIC International 
ISSN 2185-2766
Volume 13, Number 2, February 2022 pp. 133–143},
	file = {Alghamdi et al. - 2022 - Extracting ToC and Metadata from PDF Books A Rule.pdf:/Users/tullsen/Zotero/storage/WSZ2YF5B/Alghamdi et al. - 2022 - Extracting ToC and Metadata from PDF Books A Rule.pdf:application/pdf},
}

@inproceedings{reckerGPUAcceleratedPDF2011,
	address = {San Francisco, USA},
	title = {A {GPU} accelerated {PDF} transparency engine},
	volume = {7872},
	url = {https://www.spiedigitallibrary.org/conference-proceedings-of-spie/7872/78720T/A-GPU-accelerated-PDF-transparency-engine/10.1117/12.872568.short},
	doi = {10.1117/12.872568},
	abstract = {As commercial printing presses become faster, cheaper and more efficient, so too must the Raster Image Processors (RIP) that prepare data for them to print. Digital press RIPs, however, have been challenged to on the one hand meet the ever increasing print performance of the latest digital presses, and on the other hand process increasingly complex documents with transparent layers and embedded ICC profiles. This paper explores the challenges encountered when implementing a GPU accelerated driver for the open source Ghostscript Adobe PostScript and PDF language interpreter targeted at accelerating PDF transparency for high speed commercial presses. It further describes our solution, including an image memory manager for tiling input and output images and documents, a PDF compatible multiple image layer blending engine, and a GPU accelerated ICC v4 compatible color transformation engine. The result, we believe, is the foundation for a scalable, efficient, distributed RIP system that can meet current and future RIP requirements for a wide range of commercial digital presses.},
	language = {English},
	urldate = {2019-04-22},
	booktitle = {Parallel {Processing} for {Imaging} {Applications}},
	publisher = {International Society for Optics and Photonics},
	author = {Recker, John and Lin, I.-Jong and Tastl, Ingeborg},
	month = jan,
	year = {2011},
	note = {HP Labs
https://www.researchgate.net/publication/241307379\_A\_GPU\_accelerated\_PDF\_transparency\_engine},
	pages = {78720T},
	annote = {Summary
An explanation of how HP enhanced/modified GhostScript (OSS) to utilise GPUs to improve rendering performance. Note that this is from 2010 so well before GPUs were main-stream - and before Adobe (for example) added GPU support to Acrobat and Reader.
Provided as an explanation of how non-traditional software techniques are being applied to PDF parsing...},
	file = {Recker et al. - 2011 - A GPU accelerated PDF transparency engine.pdf:/Users/tullsen/Zotero/storage/6SE8FPRF/Recker et al. - 2011 - A GPU accelerated PDF transparency engine.pdf:application/pdf},
}

@article{anuragaImplementationEvaluationPDF2021,
	title = {An {Implementation} and {Evaluation} of {PDF} {Password} {Cracking} {Using} {John} the {Ripper} {And} {Crunch}},
	volume = {3},
	copyright = {Creative Commons Attribution 4.0 International, Open Access},
	issn = {78-93-5426-386-6},
	url = {https://zenodo.org/record/5112693},
	doi = {10.5281/ZENODO.5112693},
	abstract = {It can be challenging to choose the most effective wordmangling rules to apply while undertaking a dictionary-based password cracking attempt. We discuss a new method for generating password structures in the highest possibility order in this work. Based on a training set of previously revealed passwords, we first build an artificial probabilistic context-free grammar. As a result of this grammar, we can generate word-mangling rules and, as a result, password guesses for password cracking. By putting our tools and strategies to the test on genuine password sets, we will show that this strategy appears to be a more effective way to crack passwords than traditional methods. Our approach cracked 28 percent to 129 percent more passwords than John the Ripper, a publicly available standard password cracking software, in one set of testing. We'll construct a wordlist for dictionary attack using the Crunch tool.},
	language = {English},
	number = {1},
	urldate = {2021-12-28},
	journal = {Proceedings of the National Conference on Emerging Computer Applications (NCECA)-2021},
	author = {Anurag A and Mercy Joseph},
	month = jul,
	year = {2021},
	note = {Publisher: Zenodo
https://nceca.in/2021/4An\_Implementation\_and\_Evaluation\_of\_PDF\_Password\_Cracking\_Using\_John\_the\_Ripper\_and\_Crunch.pdf},
	keywords = {Bruteforce, Crunch, Dictionary Attack, John the Ripper, Kali Linux, PDF, Vulnerabilities},
	pages = {18--22},
	file = {Anurag A and Mercy Joseph - 2021 - An Implementation and Evaluation of PDF Password C.pdf:/Users/tullsen/Zotero/storage/9FVM8D4W/Anurag A and Mercy Joseph - 2021 - An Implementation and Evaluation of PDF Password C.pdf:application/pdf},
}

@inproceedings{sharifSecuringIntegrityPDF2021,
	address = {Medan, Indonesia},
	title = {Securing the {Integrity} of {PDF} {Files} using {RSA} {Digital} {Signature} and {SHA}-3 {Hash} {Function}},
	copyright = {Copyright IEEE},
	isbn = {978-1-66542-680-0},
	url = {https://ieeexplore.ieee.org/abstract/document/9650121},
	doi = {10.1109/DATABIA53375.2021.9650121},
	abstract = {Signatures are used on documents as written proof that the document was verified by the person indicated. Signature also indicated that the document originated from the signer if the document is transferred to another party. A document maybe in physical print form but may also be a digital print. A digital print requires additional security since a digital document may easily be altered by anyone although the said document is signed using a photographed or scanned signature. One of the means of security is by using the RSA Digital Signature method which is a combination of the RSA algorithm with Digital Signature. RSA algorithm is one of the public key cryptography algorithms, while Digital Signature is a security scheme which may guarantee the authenticity, non-repudiation, and integrity of a file by means of a hash function. This research implemented a web-based combination of RSA Digital Signature with SHA-3 hash function to secure the integrity of PDF files using PHP programming language. The result is a web-based system which could guarantee the authenticity, non repudiation and integrity of PDF files. Testing were carried out on six different sizes of PDF files ranging from 6 KB, up to 23285 KB on three different web browsers: Google Chrome, Microsoft Edge, and Mozilla Firefox. Average processing times of signing and verifying on each browsers were 1.3309 seconds, 1.2565 seconds, and 1.2667 seconds.},
	language = {English},
	booktitle = {2021 {International} {Conference} on {Data} {Science}, {Artificial} {Intelligence}, and {Business} {Analytics} ({DATABIA})},
	publisher = {IEEE},
	author = {Sharif, Amer and Ginting, Dewi S. and Dias, Arya D.},
	month = nov,
	year = {2021},
	keywords = {authentication, Data science, Databases, digital signature, Distance measurement, file integrity, Hash functions, Keccak, non repudiation, Portable document format, Public key cryptography, Receivers, RSA algorithm, SHA-3, signing, verification},
	pages = {154--159},
}

@inproceedings{adhataraoHowArePDF2021,
	address = {Montpellier, France},
	title = {How are {PDF} files published in the {Scientific} {Community}?},
	isbn = {978-1-66541-717-4},
	url = {https://ieeexplore.ieee.org/abstract/document/9648374},
	doi = {10.1109/WIFS53200.2021.9648374},
	abstract = {Authors are often not aware of hidden information and that they can contain more information than the actual content of the file. This work mainly focuses on how PDF files are published in the scientific community. We have analyzed a corpus of 555865 PDF files to show that direct and modified authoring process of PDF creations leads to the leakage of sensitive information on the researchers. Our analysis on the extraction of the metadata has shown that at least 23\% of the PDF files in our dataset contains valuable information on the authoring process. We were even able to solve the co-authorship (multiple authors) problem by crossing the information of multiple PDF files using linear algebra. We believe that, PDF sanitization needs to be included in the scientific publication processes to avoid leakage of sensitive information. We have explored and suggested necessary strategies available for the safer distribution of scientific work by researchers.},
	language = {English},
	booktitle = {2021 {IEEE} {International} {Workshop} on {Information} {Forensics} and {Security} ({WIFS})},
	publisher = {IEEE},
	author = {Adhatarao, Supriya and Lauradoux, Cédric},
	month = dec,
	year = {2021},
	note = {ISSN: 2157-4774},
	keywords = {Conferences, Cryptography, Data mining, Forensics, Linear algebra, metadata, Metadata, PDF files, sanitization},
	pages = {1--6},
}

@incollection{strantzAltTextCreatingAccessible2021,
	address = {New York, NY, USA},
	series = {Proceedings of {SIGDOC}'21},
	title = {Beyond "{Alt}-{Text}": {Creating} {Accessible} {Data} {Visualizations} with {Code}},
	isbn = {978-1-4503-8628-9},
	shorttitle = {Beyond \&\#x201c;{Alt}-{Text}\&\#x201d;},
	url = {https://doi.org/10.1145/3472714.3473661},
	abstract = {Data visualization is a reliable tool for professional communication practitioners for synthesizing and presenting data to a variety of audiences. However, data visualizations have a range of accessibility concerns including: visual acuity, color/contrast difficulties, color blindness and size/scale issues. Alt-text is not enough to make these visuals accessible and therefore more advanced web coding techniques, such as the Scalable Vector Graphic (SVG) format should be used to create data visualizations for the web. The use of SVG allows for greater coded semantic and contextual information to be added to data visualizations resulting in graphics that can be better interacted with by users with a variety of accessibility software.},
	language = {English},
	number = {39},
	urldate = {2021-12-20},
	booktitle = {The 39th {ACM} {International} {Conference} on {Design} of {Communication}},
	publisher = {Association for Computing Machinery},
	author = {Strantz, Adam},
	month = oct,
	year = {2021},
	keywords = {Accessibility, Code, Data Visualization, Design, Scalable Vector Graphics},
	pages = {331--337},
	file = {Strantz - 2021 - Beyond Alt-Text Creating Accessible Data Visual.pdf:/Users/tullsen/Zotero/storage/U6PABFVZ/Strantz - 2021 - Beyond Alt-Text Creating Accessible Data Visual.pdf:application/pdf},
}

@phdthesis{sebekEvaluationUNetMultilabel2021,
	address = {Stockholm, Sweden},
	type = {Master of {Science} - {Machine} {Learning}},
	title = {An evaluation of {U}-{Net}’s multi-label segmentation performance on {PDF} documents in a medical context},
	url = {http://urn.kb.se/resolve?urn=urn:nbn:se:kth:diva-306046},
	abstract = {The Portable Document Format (PDF) is an ideal format for viewing and printing documents. Today many companies store their documents in a PDF format. However, the conversion from a PDF document to any other structured format is inherently difficult. As a result, a lot of the information contained in a PDF document is not directly accessible - this is problematic. Manual intervention is required to accurately convert a PDF into another file format - this can be deemed as both strenuous and exhaustive work. An automated solution to this process could greatly improve the accessibility to information in many companies. A significant amount of literature has investigated the process of extracting information from PDF documents in a structured way. In recent years these methodologies have become heavily dependent on computer vision. The work on this paper evaluates how the U-Net model handles multi-label segmentation on PDF documents in a medical context - extending on Stahl et al.’s work in 2018. Furthermore, it compares two newer extensions of the U-Net model, MultiResUNet (2019) and SS-U-Net (2021). Additionally, it assesses how each of the models performs in a data-sparse environment. The three models were implemented, trained, and then evaluated. Their performance was measured using the Dice coefficient, Jaccard coefficient, and percentage similarity. Furthermore, visual inspection was also used to analyze how the models performed from a perceptual standpoint. The results indicate that both the U-Net and the SS-U-Net are exceptional at segmenting PDF documents effectively in a data abundant environment. However, the SS-U-Net outperformed both the U-Net and the MultiResUNet in the data-sparse environment. Furthermore, the MultiResUNet significantly underperformed in comparison to both the U-Net and SS-U-Net models in both environments. The impressive results achieved by the U-Net and SS-U-Net models suggest that it can be combined with a larger system. This proposed system allows for accurate and structured extraction of information from PDF documents.},
	language = {English},
	urldate = {2021-12-19},
	school = {KTH ROYAL INSTITUTE OF TECHNOLOGY, SCHOOL OF ELECTRICAL ENGINEERING AND COMPUTER SCIENCE},
	author = {Sebek, Fredrik},
	month = aug,
	year = {2021},
	note = {https://www.diva-portal.org/smash/record.jsf?pid=diva2\%3A1619512\&dswid=942},
	file = {Sebek - 2021 - An evaluation of U-Net’s multi-label segmentation .pdf:/Users/tullsen/Zotero/storage/E6REEGYG/Sebek - 2021 - An evaluation of U-Net’s multi-label segmentation .pdf:application/pdf},
}

@inproceedings{indartaDevelopmentEModuleCourses2021,
	title = {Development of {E}-{Module} {Courses} {Tata} {Boga} 2 {Based} on {Flip} {PDF} {Professional} for {Teaching} {Learning} {Process} in {The} {Pandemic} of {Covid} 19},
	isbn = {978-94-6239-479-7},
	url = {https://www.atlantis-press.com/proceedings/ictvet-21/125965548},
	doi = {10.2991/assehr.k.211208.029},
	abstract = {This study aims to determine the results of the validity and practicality of e-modules made using Flip PDF Professional. The research model used was developed by Borg and Gall which consists of 10 steps but only 7 steps were adapted due to time and funding constraints. The validity test was carried out by 2 experts, namely Material and Media Experts....},
	language = {en},
	urldate = {2021-12-17},
	publisher = {Atlantis Press},
	author = {Indarta, Yose and Dewi, Ika Parma and Ambiyar and Syahril and Fadhilah and Asnur, Lise and Ranuharja, Fadhli and Samala, Agariadne Dwinggo},
	month = dec,
	year = {2021},
	note = {ISSN: 2352-5398},
	pages = {174--179},
	file = {Indarta et al. - 2021 - Development of E-Module Courses Tata Boga 2 Based .pdf:/Users/tullsen/Zotero/storage/S7C4L9RE/Indarta et al. - 2021 - Development of E-Module Courses Tata Boga 2 Based .pdf:application/pdf},
}

@misc{isotc171sc2wg8ISODTS69122021,
	title = {{ISO}/{DTS} 6912 {Document} management – {Portable} {Document} {Format} – {Clarification} for initial graphics   state in {ISO} 32000-2 ({PDF} 2.0},
	copyright = {Copyright ISO},
	abstract = {This  document  clarifies  the parameter  values  for  the initial  graphics  state  of  different  types  of  content  streams that are defined in ISO 32000-2, Document management — Portable document format — Part 2: PDF 2.0.  This ensures a consistent rendered appearance under all conditions.},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 171 SC 2 WG 8}},
	month = jun,
	year = {2021},
}

@inproceedings{kaushikOffensiveApproachHiding2021,
	address = {Singapore},
	series = {Lecture {Notes} on {Data} {Engineering} and {Communications} {Technologies}},
	title = {An {Offensive} {Approach} for {Hiding} {Malicious} {Payloads} in an {Image}},
	isbn = {9789811639616},
	url = {https://link.springer.com/chapter/10.1007/978-981-16-3961-6_23},
	doi = {10.1007/978-981-16-3961-6_23},
	abstract = {Steganography is the oldest technique that is been used from century, steganography purpose has not changed, i.e., all these techniques aim at hiding data or protecting data. With the help of steganalysis, the media can be analyzed to check for the presence of any secret information. Nowadays, attackers are making the use of advanced steganography approaches to conceal the secret information and communicate in a stealth manner. In this paper, the authors have discussed about the novel approach to hide malicious payload into image metadata. Therefore, metadata is a data that describes about the image rights and its administration. Hacker generally uses this metadata to perform various malicious attacks such embedding malicious script inside the image metadata and many more.},
	language = {English},
	booktitle = {Cyber {Security} and {Digital} {Forensics}},
	publisher = {Springer},
	author = {Kaushik, Keshav and Surana, Sneha},
	editor = {Khanna, Kavita and Estrela, Vania Vieira and Rodrigues, Joel José Puga Coelho},
	month = oct,
	year = {2021},
	keywords = {Metadata, Cyber forensics, Cybersecurity, Digital forensics, EXIF, Image steganography, Payload, Stager, Steganalysis, Steganography},
	pages = {265--272},
}

@article{beckwithNeedleHaystackDetecting2021,
	title = {Needle in a {Haystack}: {Detecting} {Subtle} {Malicious} {Edits} to {Additive} {Manufacturing} {G}-code {Files}},
	issn = {1943-0671},
	shorttitle = {Needle in a {Haystack}},
	url = {https://ieeexplore.ieee.org/abstract/document/9619477},
	doi = {10.1109/LES.2021.3129108},
	abstract = {Increasing usage of Digital Manufacturing (DM) in safety-critical domains is increasing attention on the cybersecurity of the manufacturing process, as malicious third parties might aim to introduce defects in digital designs. In general, the DM process involves creating a digital object (as CAD files) before using a slicer program to convert the models into printing instructions (e.g. g-code) suitable for the target printer. As the g-code is an intermediate machine format, malicious edits may be difficult to detect, especially when the golden (original) models are not available to the manufacturer. In this work we aim to quantify this hypothesis through a red-team/blue-team case study, whereby the red-team aims to introduce subtle defects that would impact the properties (strengths) of the 3D printed parts, and the blue-team aims to detect these modifications in the absence of the golden models. The case study had two sets of models, the first with 180 designs (with 2 compromised using 2 methods) and the second with 4320 designs (with 60 compromised using 6 methods). Using statistical modelling and machine learning (ML), the blue-team was able to detect all the compromises in the first set of data, and 50 of the compromises in the second.},
	journal = {IEEE Embedded Systems Letters},
	author = {Beckwith, Caleb and Naicker, Harsh Sankar and Mehta, Svara and Udupa, Viba R. and Nim, Nghia Tri and Gadre, Varun and Pearce, Hammond and Mac, Gary and Gupta, Nikhil},
	month = nov,
	year = {2021},
	note = {Conference Name: IEEE Embedded Systems Letters},
	keywords = {Machine learning, Clustering algorithms, Manufacturing, Principal component analysis, Printers, Solid modeling, Three-dimensional displays},
	pages = {1--1},
}

@inproceedings{hussainReviewMaliciousAltering2021,
	address = {Zallaq, Bahrain},
	title = {A {Review} of {Malicious} {Altering} {Healthcare} {Imagery} using {Artificial} {Intelligence}},
	isbn = {978-1-66544-032-5},
	url = {https://ieeexplore.ieee.org/document/9582068/},
	doi = {10.1109/3ICT53449.2021.9582068},
	abstract = {During the second half of 2020, healthcare is and has been the number one target for cybercrime, enormous amount of cyberattacks on hospitals and health systems increased, and specialists trust there are more to come. Attackers who can get the way to reach the electronic health record would exploit it and will use it for their own interest like deal or vend it on the underground economy, hostage the systems and the sensitive data, that has a significant impact on operations. This review tried to analyze how cyber attacker employ Generative Adversarial Networks (GANs) to alter the evidences of patient’s medical conditions from image scans and reports. Cyber attacker has different purposes in order to obstruct a political applicant, lockup investigations, obligate insurance scam, execute an act of violence, or even commit homicide. Numerous correlated works constructed on gan in medical images practices had been reviews in the period between 2000 to 2021. Many papers showed how hospital system, physicians and radiology’s specialists and the most recent researches showed an extremely exposed to different types of intrusion gan attacks.},
	language = {en},
	urldate = {2021-11-20},
	booktitle = {2021 {International} {Conference} on {Innovation} and {Intelligence} for {Informatics}, {Computing}, and {Technologies} ({3ICT})},
	publisher = {IEEE},
	author = {Hussain, Fadheela and Ksantini, Riadh and Hammad, Mustafa},
	month = sep,
	year = {2021},
	pages = {646--651},
	file = {Hussain et al. - 2021 - A Review of Malicious Altering Healthcare Imagery .pdf:/Users/tullsen/Zotero/storage/XQWBQQKF/Hussain et al. - 2021 - A Review of Malicious Altering Healthcare Imagery .pdf:application/pdf},
}

@techreport{manharmohammedHAPSSAHolisticApproach2021,
	address = {San Diego, CA, USA},
	title = {{HAPSSA}: {Holistic} {Approach} to {PDF} {Malware} {Detection} {Using} {Signal} and {Statistical} {Analysis}},
	shorttitle = {{HAPSSA}},
	url = {https://ui.adsabs.harvard.edu/abs/2021arXiv211104703M},
	abstract = {Malicious PDF documents present a serious threat to various security organizations that require modern threat intelligence platforms to effectively analyze and characterize the identity and behavior of PDF malware. State-of-the-art approaches use machine learning (ML) to learn features that characterize PDF malware. However, ML models are often susceptible to evasion attacks, in which an adversary obfuscates the malware code to avoid being detected by an Antivirus. In this paper, we derive a simple yet effective holistic approach to PDF malware detection that leverages signal and statistical analysis of malware binaries. This includes combining orthogonal feature space models from various static and dynamic malware detection methods to enable generalized robustness when faced with code obfuscations. Using a dataset of nearly 30,000 PDF files containing both malware and benign samples, we show that our holistic approach maintains a high detection rate (99.92\%) of PDF malware and even detects new malicious files created by simple methods that remove the obfuscation conducted by malware authors to hide their malware, which are undetected by most antiviruses.},
	language = {English},
	urldate = {2021-11-13},
	institution = {IEEE},
	author = {Manhar Mohammed, Tajuddin and Nataraj, Lakshmanan and Chikkagoudar, Satish and Chandrasekaran, Shivkumar and Manjunath, B. S.},
	month = nov,
	year = {2021},
	note = {Publication Title: arXiv e-prints
ADS Bibcode: 2021arXiv211104703M
https://ieeexplore.ieee.org/document/9653097},
	keywords = {Computer Science - Cryptography and Security, Computer Science - Machine Learning, Electrical Engineering and Systems Science - Signal Processing},
	file = {Manhar Mohammed et al. - 2021 - HAPSSA Holistic Approach to PDF Malware Detection.pdf:/Users/tullsen/Zotero/storage/E7GHR3FI/Manhar Mohammed et al. - 2021 - HAPSSA Holistic Approach to PDF Malware Detection.pdf:application/pdf},
}

@article{kuribayashiStealthPDFDataHiding2021,
	title = {{StealthPDF}: {Data} hiding method for {PDF} file with no visual degradation},
	volume = {61},
	issn = {2214-2126},
	shorttitle = {{StealthPDF}},
	url = {https://www.sciencedirect.com/science/article/pii/S2214212621001034},
	doi = {10.1016/j.jisa.2021.102875},
	abstract = {Conventional data hiding methods for PDF file insert a payload data by slightly modifying the position of characters in a document. Even if the changes are small, a certain degree of visual distortion is inevitably introduced to the PDF file. In this work, we propose a new data hiding method that splits the space value between characters. Specifically, a space value is split into two or more related values. Except for the first value which is reserved to store the corrective data, each of the related values encodes a segment of the payload data. When the PDF file is opened by a PDF viewer, the visual appearance is exactly the same as its original counterpart, i.e., complete quality preservation. To prevent direct observation of PDF file, access control is introduced by setting an owner password, which is a built-in function in the PDF standard. In the best case scenario, 38,160 bits can be hidden, while the observed file size increase is 12,776 Bytes.},
	language = {English},
	urldate = {2021-06-15},
	journal = {Journal of Information Security and Applications},
	author = {Kuribayashi, Minoru and Wong, KokSheik},
	month = sep,
	year = {2021},
	keywords = {PDF, Authentication, Complete quality preservation, Data hiding, Space, StealthPDF},
	pages = {102875},
}

@inproceedings{nicholasDocumentEngineeringIssues2021,
	address = {New York, NY, USA},
	series = {{DocEng} '21},
	title = {Document engineering issues in malware analysis},
	isbn = {978-1-4503-8596-1},
	url = {https://doi.org/10.1145/3469096.3470950},
	doi = {10.1145/3469096.3470950},
	abstract = {We present an overview of the field of malware analysis with emphasis on issues related to document engineering. We will introduce the field with a discussion of the types of malware, including executable binaries, malicious PDFs, polymorphic malware, ransomware, and exploit kits. We will conclude with our view of important research questions in the field. This is an updated version of tutorials presented in previous years, with more information about newly-available tools.},
	urldate = {2021-08-17},
	booktitle = {Proceedings of the 21st {ACM} {Symposium} on {Document} {Engineering}},
	publisher = {Association for Computing Machinery},
	author = {Nicholas, Charles and Joyce, Robert J. and Simske, Steve},
	month = aug,
	year = {2021},
	keywords = {disassembler, malware analysis, virtual machine},
	pages = {1},
	file = {Nicholas et al. - 2021 - Document engineering issues in malware analysis.pdf:/Users/tullsen/Zotero/storage/XMU6LNJP/Nicholas et al. - 2021 - Document engineering issues in malware analysis.pdf:application/pdf},
}

@phdthesis{diegoleonExtractingInformationPDF2021,
	address = {Stockholm, Sweden},
	type = {{DEGREE} {PROJECT} {COMPUTER} {SCIENCE} {AND} {ENGINEERING}, {SECOND} {CYCLE}, 30 {CREDITS}},
	title = {Extracting {Information} {From} {PDF} {Invoices} {Using} {Deep} {Learning}},
	url = {https://www.diva-portal.org/smash/get/diva2:1608779/FULLTEXT01.pdf},
	abstract = {Manually extracting information from invoices can be time-consuming, especially
when managing large amounts of documents. Finding a way to automatically
extract this information could help businesses save resources. This thesis
investigates the information extraction of semi-structured data from PDF
invoices using deep learning methods and comparing them to a rule-based
model built as a baseline for comparison. More specifically, an object
detection approach based on the Faster R-CNN model is compared with a
Natural Language Processing (NLP) approach based on BERT. These models
were trained to extract 4 different fields, with a dataset consisting of 899 PDF
invoices. These models were tested on how well they extracted each field, and
their results were then compared. The NLP approach achieved the highest
overall F1 score of 0.911 and attained the highest score in all fields except
one. In second place came the rule-based approach, with an overall F1 score
of 0.830. In last place came the object detection approach with an overall
F1 score of 0.815. It is concluded that the NLP approach is best suited for
the task of information extraction from PDF invoices. Because of the small
dataset and Faster R-CNN requiring large amounts of data and long training,
the object detection approach did not reach its full potential. However, further
research is needed to prove if it could outperform the NLP approach with those
improvements.},
	language = {English, Swedish},
	school = {KTH ROYAL INSTITUTE OF TECHNOLOGY, SCHOOL OF ELECTRICAL ENGINEERING AND COMPUTER SCIENCE},
	author = {{Diego Leon}},
	month = aug,
	year = {2021},
	file = {Diego Leon - 2021 - Extracting Information From PDF Invoices Using Dee.pdf:/Users/tullsen/Zotero/storage/IPXBGJVV/Diego Leon - 2021 - Extracting Information From PDF Invoices Using Dee.pdf:application/pdf},
}

@article{giguetDanielFinTOC2021Taking2021,
	title = {Daniel@{FinTOC}-2021: {Taking} {Advantage} of {Images} and {Vectorial} {Shapes} in {Native} {PDF} {Document} {Analysis}},
	url = {https://aclanthology.org/2021.fnp-1.13.pdf},
	abstract = {In this paper, we present our contribution to the FinTOC-2021 Shared Task “Financial Document Structure Extraction”. We participated in the tracks dedicated to English and French document processing. We get results for Title detection and TOC generation performance which demonstrates a good precision. We address the problem in a fairly unusual but ambitious way which consists in considering simultaneously text content, vectorial shapes and images embedded in the native PDF document, and to structure the document in its entirety.},
	language = {English, French},
	number = {The Third Financial Narrative Processing Workshop (FNP 2021)},
	journal = {FinTOC-2021 Shared Task “Financial Document Structure Extraction”},
	author = {Giguet, Emmanuel and Lejeune, Gaël},
	month = sep,
	year = {2021},
	note = {http://wp.lancs.ac.uk/cfie/},
	pages = {5},
	file = {Giguet and Lejeune - Daniel@FinTOC-2021 Taking Advantage of Images and.pdf:/Users/tullsen/Zotero/storage/Y5WRI9NF/Giguet and Lejeune - Daniel@FinTOC-2021 Taking Advantage of Images and.pdf:application/pdf},
}

@inproceedings{guedesSupervisedLearningApproach2021,
	address = {Cham},
	series = {Lecture {Notes} in {Computer} {Science}},
	title = {Supervised {Learning} {Approach} for {Section} {Title} {Detection} in {PDF} {Scientific} {Articles}},
	volume = {13067},
	isbn = {978-3-030-89817-5},
	url = {https://link.springer.com/chapter/10.1007/978-3-030-89817-5_3},
	doi = {10.1007/978-3-030-89817-5_3},
	abstract = {The majority of scientific articles is available in Portable Document Format (PDF). Although PDF format has the advantage of preserving layout across platforms it does not maintain the original metadata structure, making it difficult further text processing. Despite different layouts, depending on the applied template, articles have a hierarchical structure and are divided into sections, which represent topics of specific subjects, such as methodology and results. Hence, section segmentation serves as an important step for a contextualized text processing of scientific articles. Therefore, this work applies binary classification, a supervised learning task, for section title detection in PDF scientific articles. To train the classifiers, a large dataset (more than 5 millions samples from 7,302 articles) was created through an automated feature extraction approach, comprised by 17 features, where 4 were introduced in this work. Training and testing were made for ten different classifiers for which the best F1 score reached 0.94. Finally, we evaluated our results against CERMINE, an open-source system that extracts metadata from scientific articles, having an absolute improvement in section detection of 0.19 in F1 score.},
	language = {en},
	booktitle = {Advances in {Computational} {Intelligence}},
	publisher = {Springer International Publishing},
	author = {Guedes, Gustavo Bartz and da Silva, Ana Estela Antunes},
	editor = {Batyrshin, Ildar and Gelbukh, Alexander and Sidorov, Grigori},
	month = oct,
	year = {2021},
	keywords = {Scientific article segmentation, Section title detection, Supervised learning, Text segmentation},
	pages = {44--54},
}

@article{mikhailovPyTabbyDocreaderModule2021,
	title = {{PyTabby}: a {Docreader}’s module for extracting text and tables from {PDF} with a text layer},
	copyright = {Creative Commons License Attribution 4.0 International (CC BY 4.0)},
	url = {http://ceur-ws.org/Vol-2984/paper15.pdf},
	abstract = {This paper presents a complete solution for extraction of textual information and tables from PDF with a text layer. The presented solution consist of two parts: PyTabby is a tool for extracting text and tables from PDF with a complex background and layout, and Python wrapper module for Docreader tool. The PyTabby tool extracts text and tables from the low level representation of the PDF format. It enables employment of the additional information excluded in scanned documents and provides improvement of quality and performance compared with Optical Character Recognition (OCR) methods. The presented solution is incorporated into Docreader tool to parse PDF files with a text layer and is used as a part of the TALISMAN technology for social analytics.},
	language = {English},
	journal = {Information Technologies: Algorithms, Models, Systems (ITAMS)},
	author = {Mikhailov, Andrey A and Shigarov, Alexey and Kozlov, Ilya S},
	month = sep,
	year = {2021},
	pages = {7},
	file = {Mikhailov et al. - PyTabby a Docreader’s module for extracting text .pdf:/Users/tullsen/Zotero/storage/Z8TGW93U/Mikhailov et al. - PyTabby a Docreader’s module for extracting text .pdf:application/pdf},
}

@misc{iccDocumentICC1A1999,
	title = {Document {ICC}.{1A}:1999-04 {Addendum} 2 to {Specification} {ICC}.1:1998-09},
	copyright = {Copyright International Color Consortium},
	url = {http://color.org/icc_specs2.xalter},
	language = {English},
	publisher = {International Color Consortium},
	author = {ICC},
	month = apr,
	year = {1999},
	file = {1999 - Document ICC.1A1999-04 Addendum 2 to Specificatio.PDF:/Users/tullsen/Zotero/storage/WG64HJ9V/1999 - Document ICC.1A1999-04 Addendum 2 to Specificatio.PDF:application/pdf},
}

@misc{iccSpecificationICC1998091998,
	title = {Specification {ICC}.1:1998-09 {File} {Format} for {Color} {Profiles}},
	copyright = {Copyright International Color Consortium},
	url = {http://color.org/icc_specs2.xalter},
	language = {English},
	publisher = {International Color Consortium},
	author = {{ICC}},
	month = sep,
	year = {1998},
	file = {1998 - Specification ICC.11998-09 File Format for Color .PDF:/Users/tullsen/Zotero/storage/PPB98SK6/1998 - Specification ICC.11998-09 File Format for Color .PDF:application/pdf},
}

@misc{iccSpecificationICC1997081997,
	title = {Specification {ICC}.1:1997-08 ({Version} 3.4) {File} {Format} for {Color} {Profiles}},
	copyright = {Copyright International Color Consortium},
	url = {http://color.org/icc_specs2.xalter},
	language = {English},
	publisher = {International Color Consortium},
	author = {{ICC}},
	month = aug,
	year = {1997},
	file = {1997 - Specification ICC.11997-08 (Version 3.4) File For.pdf:/Users/tullsen/Zotero/storage/7GLPKB77/1997 - Specification ICC.11997-08 (Version 3.4) File For.pdf:application/pdf},
}

@misc{iccSpecificationICC1996111996,
	title = {Specification {ICC}.1:1996-11 ({Version} 3.3) {File} {Format} for {Color} {Profiles}},
	copyright = {Copyright International Color Consortium},
	url = {http://color.org/icc_specs2.xalter},
	language = {English},
	publisher = {International Color Consortium},
	author = {ICC},
	month = nov,
	year = {1996},
	file = {1996 - Specification ICC.11996-11 (Version 3.3) File For.pdf:/Users/tullsen/Zotero/storage/6ASTM5LZ/1996 - Specification ICC.11996-11 (Version 3.3) File For.pdf:application/pdf},
}

@misc{iccSpecificationICC1995111995,
	title = {Specification {ICC}.1:1995-11 ({Version} 3.2) {File} {Format} for {Color} {Profiles}},
	copyright = {Copyright International Color Consortium},
	url = {http://color.org/icc_specs2.xalter},
	language = {English},
	publisher = {International Color Consortium},
	author = {{ICC}},
	month = nov,
	year = {1995},
	file = {1995 - Specification ICC.11995-11 (Version 3.2) File For.pdf:/Users/tullsen/Zotero/storage/T47QG6MW/1995 - Specification ICC.11995-11 (Version 3.2) File For.pdf:application/pdf},
}

@misc{iccSpecificationICC1995051995,
	title = {Specification {ICC}.1:1995-05 ({Version} 3.01) {File} {Format} for {Color} {Profiles}},
	copyright = {Copyright International Color Consortium},
	url = {http://color.org/icc_specs2.xalter},
	language = {English},
	publisher = {International Color Consortium},
	author = {{ICC}},
	month = may,
	year = {1995},
	file = {1995 - Specification ICC.11995-05 (Version 3.01) File Fo.pdf:/Users/tullsen/Zotero/storage/8KHNYDB2/1995 - Specification ICC.11995-05 (Version 3.01) File Fo.pdf:application/pdf},
}

@misc{iccSpecificationICC1994061994,
	title = {Specification {ICC}.1:1994-06 ({Version} 3.0) {File} {Format} for {Color} {Profiles}},
	copyright = {Copyright International Color Consortium},
	url = {http://color.org/icc_specs2.xalter},
	language = {English},
	publisher = {International Color Consortium},
	author = {{ICC}},
	month = jun,
	year = {1994},
	file = {1994 - Specification ICC.11994-06 (Version 3.0) File For.pdf:/Users/tullsen/Zotero/storage/N7XE6RMV/1994 - Specification ICC.11994-06 (Version 3.0) File For.pdf:application/pdf},
}

@misc{iccICC2004102004,
	title = {{ICC}.1:2004-10},
	copyright = {Copyright International Color Consortium},
	shorttitle = {{ICC}.1 v4.2.0},
	url = {http://color.org/icc_specs2.xalter},
	language = {English},
	publisher = {International Color Consortium},
	author = {ICC},
	month = oct,
	year = {2004},
	file = {2004 - ICC.12004-10.pdf:/Users/tullsen/Zotero/storage/ATX3TA3M/2004 - ICC.12004-10.pdf:application/pdf},
}

@misc{iccPrivateICCTag2019,
	title = {Private and {ICC} {Tag} and {CMM} {Registry}},
	copyright = {Copyright International Color Consortium},
	shorttitle = {Tag {Registry}},
	url = {http://www.color.org/signatures2.xalter},
	language = {English},
	publisher = {International Color Consortium},
	author = {{ICC}},
	month = oct,
	year = {2019},
	file = {2019 - Private and ICC Tag and CMM Registry.pdf:/Users/tullsen/Zotero/storage/F6TRE5SD/2019 - Private and ICC Tag and CMM Registry.pdf:application/pdf},
}

@misc{iccSpecificationICC20192021,
	title = {Specification {ICC}.2:2019 ({Profile} version 5.0.0.0) {Cumulative} {Errata} {List}},
	url = {https://color.org/iccmax/ICC.2-2019_Cumulative_Errata_List_2021-09-09.pdf},
	abstract = {Cumulative Errata List for iccMAX / ICC.2 / v5},
	language = {English},
	publisher = {International Color Consortium},
	author = {{ICC}},
	month = sep,
	year = {2021},
	file = {ICC - 2021 - Specification ICC.22019 (Profile version 5.0.0.0).pdf:/Users/tullsen/Zotero/storage/BMGNE69H/ICC - 2021 - Specification ICC.22019 (Profile version 5.0.0.0).pdf:application/pdf},
}

@misc{iccWhitePaper472018,
	title = {White {Paper} 47: {The} value of {iccMAX}},
	copyright = {Copyright International Color Consortium},
	shorttitle = {White {Paper} 47},
	abstract = {iccMAX is a color management interchange format that addresses use cases beyond those addressed by the ICC v4 (ISO 15076-1) color management profile format. ICC v4 is widely used today in graphic arts workflows. For most of these workflows, v4 is straightforward to use and uniformly implemented across a large number of different software applications from different vendors. ICC v4 has enabled users to get the same or very similar results when color managing files through multiple different workflows, especially for the graphic arts.
In other applications, however such as managing digital photographs or color managing packaging in store lighting conditions, v4 is missing some key features. iccMAX evolved from work within the ICC to extend the v4 profile format beyond the graphic arts. iccMAX workflows are intended to be backward-compatible with v4, which means that iccMAX-aware applications also have to be able to use v4 profiles.
This paper is addressed to end users of color management systems, and is intended to be used to decide when an iccMAX rather than ICC v4 is the appropriate choice.},
	language = {English},
	publisher = {International Color Consortium},
	author = {ICC},
	month = may,
	year = {2018},
	file = {2018 - White Paper 47 The value of iccMAX.pdf:/Users/tullsen/Zotero/storage/JGC2SYBN/2018 - White Paper 47 The value of iccMAX.pdf:application/pdf},
}

@misc{iccSpecificationICC20192019,
	title = {Specification {ICC}.2:2019 ({Profile} version 5.0.0 - {iccMAX}) {Image} technology color management - {Extensions} to architecture, profile format and data structure [{REVISION} of {ICC}.2:2018]},
	copyright = {Copyright International Color Consortium},
	shorttitle = {{ICC}.2 v5.0.0},
	language = {English},
	publisher = {International Color Consortium},
	author = {{ICC}},
	year = {2019},
	file = {2019 - Specification ICC.22019 (Profile version 5.0.0 - .pdf:/Users/tullsen/Zotero/storage/CXIDVQ7P/2019 - Specification ICC.22019 (Profile version 5.0.0 - .pdf:application/pdf},
}

@misc{ferdZUGFeRD2019,
	title = {{ZUGFeRD} 2.0.1},
	shorttitle = {{ZUGFeRD} 2.0.1},
	url = {https://www.ferd-net.de/zugferd/zugferd-2.0/index.html},
	abstract = {The German Forum on electronic Invoicing (FeRD) has developed a new release of uniform data format called ZUGFeRD 2.0 on 11th March 2019. It can be used for exchanging invoice data between enterprises, authorities and consumers. The format allows for an exchange of structural invoice data between the issuer and the recipient in a single PDF file without any necessary further steps of reading or processing the data.

ZUGFeRD 2.0 was developed in close coordination with the French standard Factur-X 1.0, is technically identical to it and thus also pursues the standardization objectives at the European level. The hybrid invoice format contains the structured invoice data in a PDF / A-3 file that forms the view component of the invoice. The structured invoice data can be read out and processed by the invoice recipient.

ZUGFeRD 2.0 meets the requirements of the EU Directive and the EU standard.},
	language = {German, English},
	publisher = {Forum for Electronic Invoicing Germany (FeRD)},
	author = {{FeRD}},
	month = mar,
	year = {2019},
	note = {Can be downloaded free of charge from above URL.},
	keywords = {Invoicing, PDF/A},
}

@article{suriLostMigrationDocument2018,
	title = {Lost in migration: document quality for batch conversion to {PDF}/{A}},
	volume = {39},
	issn = {0737-8831},
	shorttitle = {Lost in migration},
	url = {https://doi.org/10.1108/LHT-10-2017-0220},
	doi = {10.1108/LHT-10-2017-0220},
	abstract = {Purpose Changes in file format specifications challenge long-term preservation of digital documents. Digital archives thus often focus on specific file formats that are well suited for long-term preservation, such as the PDF/A format. Since only few customers submit PDF/A files, digital archives may consider converting submitted files to the PDF/A format. The paper aims to discuss these issues. Design/methodology/approach The authors evaluated three software tools for batch conversion of common file formats to PDF/A-1b: LuraTech PDF Compressor, Adobe Acrobat XI Pro and 3-HeightsTM Document Converter by PDF Tools. The test set consisted of 80 files, with 10 files each of the eight file types JPEG, MS PowerPoint, PDF, PNG, MS Word, MS Excel, MSG and “web page.” Findings Batch processing was sometimes hindered by stops that required manual interference. Depending on the software tool, three to four of these stops occurred during batch processing of the 80 test files. Furthermore, the conversion tools sometimes failed to produce output files even for supported file formats: three (Adobe Pro) up to seven (LuraTech and 3-HeightsTM) PDF/A-1b files were not produced. Since Adobe Pro does not convert e-mails, a total of 213 PDF/A-1b files were produced. The faithfulness of each conversion was investigated by comparing the visual appearance of the input document with that of the produced PDF/A-1b document on a computer screen. Meticulous visual inspection revealed that the conversion to PDF/A-1b impaired the information content in 24 of the converted 213 files (11 percent). These reproducibility errors included loss of links, loss of other document content (unreadable characters, missing text, document part missing), updated fields (reflecting time and folder of conversion), vector graphics issues and spelling errors. Originality/value These results indicate that large-scale batch conversions of heterogeneous files to PDF/A-1b cause complex issues that need to be addressed for each individual file. Even with considerable efforts, some information loss seems unavoidable if large numbers of files from heterogeneous sources are migrated to the PDF/A-1b format.},
	number = {2},
	urldate = {2021-09-27},
	journal = {Library Hi Tech},
	author = {Suri, Roland Erwin and El-Saad, Mohamed},
	month = jan,
	year = {2018},
	note = {Publisher: Emerald Publishing Limited},
	keywords = {Academic libraries, Archives, Conversion, Digital documents, Digital libraries, Digital preservation},
	pages = {337--351},
	file = {Snapshot:/Users/tullsen/Zotero/storage/2TRUASN7/html.html:text/html},
}

@misc{ferdZUGFeRD2020,
	title = {{ZUGFeRD} 2.1.1},
	copyright = {Apache 2.0},
	shorttitle = {{ZUGFeRD} 2.1.1},
	url = {https://www.ferd-net.de/standards/zugferd-2.1.1/zugferd-2.1.1.html},
	abstract = {The German Forum on electronic Invoicing (FeRD) has developed a new release of uniform data format called ZUGFeRD 2.0 on 11th March 2019. It can be used for exchanging invoice data between enterprises, authorities and consumers. The format allows for an exchange of structural invoice data between the issuer and the recipient in a single PDF file without any necessary further steps of reading or processing the data.

ZUGFeRD 2.0 was developed in close coordination with the French standard Factur-X 1.0, is technically identical to it and thus also pursues the standardization objectives at the European level. The hybrid invoice format contains the structured invoice data in a PDF / A-3 file that forms the view component of the invoice. The structured invoice data can be read out and processed by the invoice recipient.

ZUGFeRD 2.0 meets the requirements of the EU Directive and the EU standard.},
	language = {German, English},
	publisher = {Forum for Electronic Invoicing Germany (FeRD)},
	author = {{FeRD}},
	month = jul,
	year = {2020},
	note = {Can be downloaded free of charge from above URL.},
	keywords = {Invoicing, PDF/A},
	file = {ZUGFeRD-2.1.1 - Vergleich_ZUGFeRD1_ZUGFeRD21.pdf:/Users/tullsen/Zotero/storage/ZYT7UWNJ/ZUGFeRD-2.1.1 - Vergleich_ZUGFeRD1_ZUGFeRD21.pdf:application/pdf;ZUGFeRD-2.1.1 - Specification_TA_ReferenceProfiles.pdf:/Users/tullsen/Zotero/storage/97E65XLZ/ZUGFeRD-2.1.1 - Specification_TA_ReferenceProfiles.pdf:application/pdf;ZUGFeRD-2.1.1 - Specification_TA_Part-B.pdf:/Users/tullsen/Zotero/storage/6V944WSS/ZUGFeRD-2.1.1 - Specification_TA_Part-B.pdf:application/pdf;ZUGFeRD-2.1.1 - Specification_TA_Part-A.pdf:/Users/tullsen/Zotero/storage/YPUW6UBR/ZUGFeRD-2.1.1 - Specification_TA_Part-A.pdf:application/pdf;ZUGFeRD-2.1.1 - Specification_Known-Issues.pdf:/Users/tullsen/Zotero/storage/DSEBMZXC/ZUGFeRD-2.1.1 - Specification_Known-Issues.pdf:application/pdf;ZUGFeRD-2.1.1 - Specification_TA.pdf:/Users/tullsen/Zotero/storage/7XK2HXAD/ZUGFeRD-2.1.1 - Specification_TA.pdf:application/pdf;EN16931 code lists values used from 2020-02-14.xlsx:/Users/tullsen/Zotero/storage/ZQ3N9HZM/EN16931 code lists values used from 2020-02-14.xlsx:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;ZUGFeRD-2.1.1 - Specification.pdf:/Users/tullsen/Zotero/storage/DFDQLTGT/ZUGFeRD-2.1.1 - Specification.pdf:application/pdf},
}

@misc{ferdOrderX2021,
	title = {Order-{X} 1.0},
	copyright = {Apache 2.0},
	shorttitle = {Order-{X} 1.0},
	url = {https://www.ferd-net.de/standards/download-packages/order-x-1.0.html},
	abstract = {The German Forum on electronic Invoicing (FeRD) has developed a new release of uniform data format called Order-X. Order-X is the new hybrid format for the digitalized workflow of creating and processing orders. It is built on the same semantic data model as the hybrid format for electronic invoices, ZUGFeRD/Factur-X. The main advantage of such a hybrid format is it allows to replace traditional simple PDF or paper orders exchange by PDF orders with structured data for automated processing by companies which are willing or able to do so, especially SMEs. In addition, it is now possible to establish a seamless and collaborative process from order to invoice, in conjunction with ZUGFeRD/Factur-X, as both formats are based on the same XML structure.
Order-X is the hybrid equivalent to the electronic invoice standard ZUGFeRD/Factur-X, allowing to process purchase orders electronically. It is based on the sam international data model of the United Nations "UN/CEFACT Supply Chain Reference Data Model" (SCRDM) thus ensuring interoperability with ZUGFeRD/Factur-X.
In its hybrid version, Order-X embeds the structured information in a PDF/A-3 file which ist he visual representation of the order. The structured XML information can then be automatically processed by the receiver.},
	language = {English, German},
	publisher = {Forum for Electronic Invoicing Germany (FeRD)},
	author = {FeRD},
	month = apr,
	year = {2021},
	file = {01-2021 04 13 - FeRD-FNFE Cross Industry ORDER-X Process Specification V1.0.pdf:/Users/tullsen/Zotero/storage/BTACNBHA/01-2021 04 13 - FeRD-FNFE Cross Industry ORDER-X Process Specification V1.0.pdf:application/pdf;Electronic_Invoices_Practical_Guidelines_for_Companies.pdf:/Users/tullsen/Zotero/storage/IF7C2LLK/Electronic_Invoices_Practical_Guidelines_for_Companies.pdf:application/pdf},
}

@misc{aiimAIIMBP022008Best2008,
	title = {{AIIM} {BP}-02-2008 {Best} {Practices} - {Implementation} guide for the {Portable} {Document} {Format} {Healthcare} ({PDF}/{H})},
	copyright = {Copyright AIIM},
	shorttitle = {{PDF}/{H}},
	url = {https://issuu.com/michaelejahn/docs/pdf-h_implementation_guide_2008/49},
	abstract = {Portable Document Format (PDF) is a digital file format that provides a method for presenting information that is independent of the application software, hardware, and operating system used to create the information and of the output device used to display or print the information. The independent nature of PDF facilitates the process of creating, managing, securing, collecting, and exchanging digital content on diverse platforms and devices. As such, the use of PDF provides the basis for information portability and interoperability. The migration of multiple medical record types to a universal digital format would be enabled by implementation of an easily adopted document encapsulation practice. This practice would contain specifications for portability, interoperability, and security and would promote the exchange of healthcare information.
The Portable Document Format Healthcare, A Best Practices Guide (AIIM/ASTM BP-01-2008) is designed as an aide to help describe the technical aspects of such a practice. This companion Implementation Guide is meant to be an adjunct to the Best Practices Guide and provides sample implementation information.
The Portable Document Format Healthcare, A Best Practices Guide describes the practice of using PDF to facilitate a trusted means by which healthcare information is captured, exchanged, preserved, and protected among consumers and the rest of the healthcare system.
As an adjunct to the Best Practices Guide, this Implementation Guide is intended to serve as an example for implementation and does not preclude development or implementation of any other features or implementations of PDF Healthcare through additional usage models in support of storing, exchanging, sharing, or viewing of personal healthcare data.
This Implementation Guide recognizes that there is a continuum of implementation models ranging from large enterprise models, to independent software vendor models, small to medium sized healthcare provider offices, enterprise to physician, physician to enterprise, physician to physician, physician to other healthcare provider, patient to physician, patient to other healthcare provider, and more. This Implementation Guide cannot describe every use case but will provide a framework that can be replicated in a wide variety of instances.},
	language = {English},
	publisher = {AIIM},
	author = {{AIIM}},
	year = {2008},
	keywords = {PDF/H},
}

@misc{aiimAIIMASTMBP0120082008,
	title = {{AIIM}/{ASTM} {BP}-01-2008 {Portable} {Document} {Format} {Healthcare}: {A} {Best} {Practices} {Guide}},
	copyright = {Copyright ASTM},
	shorttitle = {{PDF}/{H}},
	url = {https://www.astm.org/Standards/AIIMASTM.htm},
	abstract = {This Portable Document Format Healthcare Best Practices Guide describes the features and functions of the proposed, voluntary, and industry-wide use of the Portable Document Format (PDF) for the healthcare industry. As such, the guide is intended to be used as a reference tool for defining PDF as an electronic container by which healthcare information can be captured, exchanged, preserved, and protected for consumers, care providers, and other stakeholders within the healthcare delivery system.
The guide does not describe a normative file format in the same manner as PDF/A. Instead the guide provides education on the use of PDF to support extensible Markup Language (XML) standards in the healthcare ecosystem and enable longer-term retention of PDF healthcare documents.
Note: This document is not Intended to address compliance with any applicable state and federal regulations that might apply to this information, including Public Law 104· 191 (1996), the Health Insurance Portability and Accountability Act (HIPAA).
The Implementation Gulde for PDF Healthcare is published as a separate document by AIIM and can be found at: http J/wvm.ai1m.org/pdfh/ig},
	language = {English},
	publisher = {AIIM},
	author = {AIIM},
	month = feb,
	year = {2008},
	note = {Specification can be purchased via ASTM webstore.},
	keywords = {PDF/H},
	file = {2008 - AIIMASTM BP-01-2008 Portable Document Format Heal.pdf:/Users/tullsen/Zotero/storage/6DEZS6WC/2008 - AIIMASTM BP-01-2008 Portable Document Format Heal.pdf:application/pdf},
}

@misc{hpPCLmPCLmS2013,
	title = {{PCLm} / {PCLmS}},
	shorttitle = {{PCLm}},
	abstract = {PCLm is an HP proprietary format for streaming mobile printing which is derived from PDF. PCLm was originally released by HP as part of e-Print and was later officially incorporated the WiFi Alliance WiFi Direct Print Services and promoted by Mopria as one of 3 PDLs for mobile printing (PCLm, PDF, PWG Raster). 

The PCLm Specification is currently only available to WiFi Alliance Members. HP US patent application US 20130100486 A1 “Communication architectures for direct printing and scanning” discloses information on PCLm:

To create a streamable PDF document, a new protocol called Printer Control Language—Mobile (PCLm) has been created. PCLm has been developed using a subset of the PDF grammar. PCLm allows for consumption of the print files by all PDF 1.4 or later compliant devices and allows for both printing and viewing of the rasterized pages. PCLm may exclude a number of PDF grammar constructs including text, vector, images, patterns, transparency and blending instructions. The exclusion of these commands makes PCLm deterministic because commands that could be retroactively applied to data which has already been streamed have been eliminated. This allows printers to begin printing as soon as the PCLm data stream begins to be received. This minimizes the amount of memory and computation required by the printer and allows low end printers to effectively print large documents.

PCLm is device independent because PCLm supports standard and device independent imaging constructs. Image data is in contone space (8 bit gray and 24 bit RGB) and has not been altered in any device dependent way. PCLm is also minimalistic because it is designed to be lightweight and efficient. This allows low-end consuming devices with limited processing power and memory to consume PCLm files. However, the configurability of the language also enables high-end devices to improve throughput and performance.

As used in the specification and appended claims, the term “streamable” refers to print files whose data is not retroactively altered by subsequent data in the file. This allows the print files to be streamed to the printer and printing to begin prior to receiving the entire print file because portions of the print file that are received by the printer are not modified by subsequent data. Consequently, the printer can begin printing once a predetermined amount of data has been received. For example, after receiving a swath or page, the printing can commence because subsequent data will not retroactively alter the data already received. In some embodiments, the print files may be streamable in that each bit of data representing a portion of an image to be printed is not altered once it is received by the printer.

As used in the specification and appended claims, the term “deterministic” refers to files, protocols and techniques that produce print jobs (payload) of known size and complexity. This guarantees that the printer will always be able to print the job without causing a memory out or a performance issue. For example, a print file that uses retroactive commands is not deterministic because the complete print file must be received, stored and manipulated by the printer. Because print files have varying sizes (sometimes as large as hundreds of megabytes), there is no guarantee that the printer will have enough memory or processing power to print the job.

PCLm is viewable by all standard PDF viewers because it is constructed with a proper subset of PDF. Some PCLm language constructs are added to the grammar to facilitate viewing. These additional constructs can be ignored by consuming devices. PCLm documents are also archivable because they are constructed so that they can be stored and re-printed at later times. 
Additionally, PCLm is predictable and deterministic in its rate of document consumption and page production. Some performance variability can be incurred by the size and complexity of the input page image, but this will not cause significant throughput degradation. PCLm formatted documents are streamable because all the content for imaging a page is delivered by the end of that page's description within the PCLm stream. Raster data contained within the page is delivered in a page-logical-top to page-logical-bottom, thus allowing trivial consumption by the consumable device. To enhance page streamability in low memory devices, PCLm can support segmentation of individual pages into strips or swaths. The height of the strips will be device dependent and discovered during the IPP device capabilities query. This process may reduce the amount of memory required to buffer the raster page.
The printer status, job control, and job transmission are performed using Internet Printing Protocol (IPP). IPP provides a standard network protocol for remote printing and managing print jobs, media size, resolution, etc. IPP is implemented using Hypertext Transfer Protocol (HTTP) and inherits all of the HTTP streaming and security features, including encryption protocols such as Transport Layer Security (TLS) and HTTPS. IPP allows clients to query a printer's capabilities, submit print jobs, query the status of a printer, query the status of print jobs, cancel previously submitted jobs, and other actions.
Some PCLm devices have limited resources and thus cannot perform all necessary raster and page-order transformations to support duplex printing. Therefore, PCLm devices may require that the client develop the raster for the backside of duplex jobs so that it is oriented (flipped and or mirrored) correctly for the consuming device and possibly reorder the pages. The client computing device can discover the requirements of the consuming device by the IPP attribute backside-orientation and backside-scan-direction. PCLm can support a number of scanline orientations for backside duplex pages including: bottom-to-top or top-to-bottom, right-to-left or left-to-right, and frontside-first or backside-first. These scan line orientations are noted in the file and may not adversely affect the viewing of the file.

Other websites also describe “PCLm/PCLmS” as streaming mobile formats. For example http://en.wikipedia.org/wiki/Talk\%3APrinter\_Command\_Language: 

What is unique about PCLm/PCLmS is that it contains Job Ticketing metadata which is very important in future mobile printing applications. So, you will start seeing many more printers that will list support for PCLm/PCLmS format. The other format you will start seeing is PWG format, which also includes job ticketing information. Both of these raster formats will be used by mobile devices to minimize the code required to generate a print stream that could be printed to virtually any printer connected to the Cloud or easily viewed on any supporting device. There is very little information about PCLm/PCLmS format. But, PWG is very well documented and its positioning is explained at the W3 Printer Work Group website: https://www.pwg.org/. PageTech's PCLReader, PCLWorks and PCLTool SDK products all have the ability to view/transform both the PCLm/PCLmS and PWG formats including the job ticketing metadata. They can also convert PCL with whatever job ticketing data is available in PCL or PJL format into PCLm/PCLmS and PWG formats. Job ticketing information (copy count, duplexing, stapling, color, etc.) is used to determine which printers are available that can print your file.},
	language = {English},
	publisher = {Hewlett Packard},
	author = {{HP}},
	year = {2013},
	file = {UPnP_File_Transfer_Service_Technical_Specification_v1.1.pdf:/Users/tullsen/Zotero/storage/KMFCJWSV/UPnP_File_Transfer_Service_Technical_Specification_v1.1.pdf:application/pdf;Wi-Fi_Peer-to-Peer_Services_Print_Technical_Specification_v1.1.pdf:/Users/tullsen/Zotero/storage/3D5CDALD/Wi-Fi_Peer-to-Peer_Services_Print_Technical_Specification_v1.1.pdf:application/pdf;Wi-Fi_Peer-to-Peer_Services_Technical_Specification_v1.2.pdf:/Users/tullsen/Zotero/storage/JJMV64A2/Wi-Fi_Peer-to-Peer_Services_Technical_Specification_v1.2.pdf:application/pdf},
}

@misc{ferdZUGFeRD2014,
	title = {{ZUGFeRD} 1.0},
	shorttitle = {{ZUGFeRD} 1.0},
	url = {https://www.ferd-net.de/zugferd/zugferd-1.0/index.html},
	abstract = {ZUGFeRD 1.0 (June 2014) is a German standard based on PDF/A-3 that allows the blind exchange of invoices between supplier and payer without any requirements for prior arrangements. ZUGFeRD invoices can be deployed universally and are not limited to specific industry sectors or company sizes (unlike EDI). Private enterprises as well as public administration can efficiently organize their invoice processing with ZUGFeRD. The standard has been created by a working group which comprises members from the public administration, three German federal ministries, industry associations in the financial, tax and software sectors, and other organizations.

ZUGFeRD invoices carry both a human-readable representation (rendering) of the invoice as well as a structured machine-readable XML representation based on “Core Cross Industry Invoice” (CII) developed by UN/CEFACT. The CII provides a large framework with more than 2000 elements. The framework is modelled around business processes and relationships. The human-readable rendering is encoded as one or more PDF pages according to the PDF/A standard. In order to associate both invoice renderings with each other, ZUGFeRD leverages an important feature of PDF/A-3 (ISO 19005-3) which allows embedded attachments of arbitrary types into the PDF/A document. The XML invoice data is embedded in the PDF document as an attachment according to PDF/A-3 “Associated File” feature. In other words, ZUGFeRD invoices contain two separate renderings of the invoice where PDF/A-3 serves both as one of the renderings as well as the container for the XML CII rendering.
See http://www.pdflib.com/knowledge-base/pdfa/zugferd-invoices/},
	language = {German, English},
	publisher = {Forum for Electronic Invoicing Germany (FeRD)},
	author = {FeRD},
	month = jun,
	year = {2014},
	note = {Can be downloaded free of charge from above URL.},
	keywords = {Invoicing, PDF/A},
}

@misc{vdaVDA49532Drawingfree2015,
	title = {{VDA} 4953-2 {Drawing}-free {Product} {Documentation}},
	shorttitle = {{VDA} 4953-2},
	url = {https://www.vda.de/en/services/Publications/drawing-free-product-documentation.html},
	abstract = {The first part of VDA Recommendation 4953 described working with simplified drawings combined with 3D models and a master data sheet. Part 2 of the Recommendation describes the means available to the automotive industry for producing product documentation within a drawing-free process (DFP)

Available languages: German, English},
	language = {English},
	publisher = {Verband der Automobilindustrie (VDA)},
	author = {{VDA}},
	month = mar,
	year = {2015},
	keywords = {PDF/A},
	file = {2015 - VDA 4953-2 Drawing-free Product Documentation.pdf:/Users/tullsen/Zotero/storage/HTKISTUY/2015 - VDA 4953-2 Drawing-free Product Documentation.pdf:application/pdf},
}

@article{zhangjiandongExtractingPDFTables2021,
	title = {Extracting {PDF} {Tables} {Based} on {Word} {Vectors}},
	volume = {5},
	issn = {2096-3467},
	url = {http://manu44.magtech.com.cn/Jwk_infotech_wk3/EN/abstract/abstract5140.shtml},
	doi = {10.11925/infotech.2096-3467.2021.0164},
	language = {cn},
	number = {8},
	urldate = {2021-09-26},
	journal = {Data Analysis and Knowledge Discovery},
	author = {Zhang Jiandong, Chen Shiji and Zhang Jiandong, Chen Shiji},
	month = sep,
	year = {2021},
	pages = {34--44},
	file = {Zhang Jiandong and Zhang Jiandong - 2021 - Extracting PDF Tables Based on Word Vectors.pdf:/Users/tullsen/Zotero/storage/FQ6AHH9E/Zhang Jiandong and Zhang Jiandong - 2021 - Extracting PDF Tables Based on Word Vectors.pdf:application/pdf},
}

@article{damoreTraitorProofPDFWatermarking2021,
	title = {Traitor-{Proof} {PDF} {Watermarking}},
	url = {http://arxiv.org/abs/2109.09712},
	abstract = {This paper presents a traitor-tracing technique based on the watermarking of digital documents (pdf ﬁles in particular). The watermarking algorithm uses a chain of three separate techniques that work in synergy. The embedded payload can withstand a wide range of attacks and cannot be removed without invalidating the credibility of the document.},
	language = {en},
	urldate = {2021-09-26},
	journal = {arXiv:2109.09712 [cs]},
	author = {d'Amore, Fabrizio and Serpi, Alessandro},
	month = sep,
	year = {2021},
	note = {arXiv: 2109.09712},
	keywords = {Computer Science - Cryptography and Security},
	annote = {Comment: 23 pages LNCS formatted, 3 figures. submitted to workshop},
	file = {d'Amore and Serpi - 2021 - Traitor-Proof PDF Watermarking.pdf:/Users/tullsen/Zotero/storage/VYIDKVZR/d'Amore and Serpi - 2021 - Traitor-Proof PDF Watermarking.pdf:application/pdf},
}

@article{fayyazAccessibilityTablesPDF2021,
	title = {Accessibility of {Tables} in {PDF} {Documents}},
	volume = {40},
	issn = {2163-5226, 0730-9295},
	url = {https://ejournals.bc.edu/index.php/ital/article/view/12325},
	doi = {10.6017/ital.v40i3.12325},
	abstract = {People access and share information over the web and in other digital environments, including digital libraries, in the form of documents such as books, articles, technical reports, etc. These documents are in a variety of formats, of which the Portable Document Format (PDF) is most widely used because of its emphasis on preserving the layout of the original material. The retrieval of relevant material from these derivative documents is challenging for information retrieval (IR) because the rich semantic structure of these documents is lost. The retrieval of important units such as images, figures, algorithms, mathematical formulas, and tables becomes a challenge. Among these elements, tables are particularly important because they can add value to the resource description, discovery, and accessibility of documents not only on the web but also in libraries if they are made retrievable and presentable to readers. Sighted users comprehend tables for sensemaking using visual cues, but blind and visually impaired users must rely on assistive technologies, including textto-speech and screen readers, to comprehend tables. However, these technologies do not pay sufficient attention to tables in order to effectively present tables to visually impaired individuals. Therefore, ways must be found to make tables in PDF documents not only retrievable but also comprehensible. Before developing such solutions, it is necessary to review the available assistive technologies, tools, and frameworks for their capabilities, strengths, and limitations from the comprehension perspective of blind and visually impaired people, along with suitable environments like digital libraries. We found no such review article that critically and analytically presents and evaluates these technologies. To fill this gap in the literature, this review paper reports on the current state of the accessibility of PDF documents, digital libraries, assistive technologies, tools, and frameworks that make PDF tables comprehensible and accessible to blind and visually impaired people. The study findings have implications for libraries, information sciences, and information retrieval.},
	language = {English},
	number = {3},
	urldate = {2021-09-26},
	journal = {Information Technology and Libraries},
	author = {Fayyaz, Nosheen and Khusro, Shah and Ullah, Shakir},
	month = sep,
	year = {2021},
	file = {Fayyaz et al. - 2021 - Accessibility of Tables in PDF Documents.pdf:/Users/tullsen/Zotero/storage/C7NBM4YN/Fayyaz et al. - 2021 - Accessibility of Tables in PDF Documents.pdf:application/pdf},
}

@article{ullrichRealworldStringComparison2021,
	title = {Real-world {String} {Comparison}: {How} to handle {Unicode} sequences correctly},
	volume = {19},
	issn = {1542-7730},
	shorttitle = {Real-world {String} {Comparison}},
	url = {https://doi.org/10.1145/3475965.3478522},
	doi = {10.1145/3475965.3478522},
	abstract = {In many languages a string comparison is a pitfall for beginners. With any Unicode string as input, a comparison often causes problems even for advanced users. The semantic equivalence of different characters in Unicode requires a normalization of the strings before comparing them. This article shows how to handle Unicode sequences correctly. The comparison of two strings for equality often raises questions concerning the difference between comparison by value, comparison of object references, strict equality, and loose equality. The most important aspect is semantic equivalence.},
	language = {English},
	number = {3},
	urldate = {2021-09-14},
	journal = {Queue},
	author = {Ullrich, Torsten},
	month = jun,
	year = {2021},
	note = {https://queue.acm.org/detail.cfm?id=3478522},
	pages = {Pages 50:107--Pages 50:116},
	file = {Ullrich - 2021 - Real-world String Comparison How to handle Unicod.pdf:/Users/tullsen/Zotero/storage/H8XUDJC5/Ullrich - 2021 - Real-world String Comparison How to handle Unicod.pdf:application/pdf},
}

@misc{j.reynaPotential360Degree2021,
	title = {The {Potential} of 360 {Degree} {Videos} - {PDF} {Flipbook}},
	url = {https://abox.pub/the-potential-of-360-degree-videos-pdf-flipbook.html},
	abstract = {Cutting-edge video technologies have the potential to impact teaching, learning and research by providing more efficient, flexible and immerse experiences. In the early 90s, Apple developed QuickTime Virtual Reality (QTVR), and it can be considered an inspiration for 360-degree videos.
QuickTime VR technology used a series of pictures and stitched them together cylindrically (images wrapped around the viewer) using a QuickTime movie file. Users were able to scroll up and down, right and left, zoom in and out and even click links that contained audio or pop-up windows. In the late 90s, applications such as PanoViewer were developed using Flash that has similar functionality. With the mobile phone (2007) and tablet revolution (2010), these applications became redundant, and mobile applications started to offer VR experiences. Regrettably; it never has a massive uptake for education nor the general public. Twenty years later, the 360-degree video cameras were introduced, and YouTube support for 360-degree videos started (2015). Currently, there are more than twenty 360-degree video camera brands on the market. The growth of action cameras and applications may inspire this trend. This paper covers the technical side of 360-degree videos and discusses their potential application for teaching, learning and research.},
	language = {English},
	urldate = {2021-09-14},
	journal = {abox.pub},
	author = {{J. Reyna}},
	month = apr,
	year = {2021},
	note = {Lecture in Higher Education, Learning Design, Digital Media for Learning Scholar, Faculty of Science, University of Technology Sydney (AUSTRALIA)},
	file = {Snapshot:/Users/tullsen/Zotero/storage/UWGMBVZS/the-potential-of-360-degree-videos-pdf-flipbook.html:text/html},
}

@inproceedings{nayanBanglaPDFSpeaker2021,
	title = {Bangla {PDF} {Speaker} : {A} {Complete} {Computer} {Application} to {Convert} {Bangla} {PDF} to {Speech}},
	shorttitle = {Bangla {PDF} {Speaker}},
	doi = {10.1109/ACMI53878.2021.9528221},
	abstract = {In this paper, a complete computer application is presented that can convert Bangla PDF to Bangla Speech. According to the proposed technique, images are extracted from PDF and then after processing the images, they are sent to OCR engine to extract text. Extracted text are then normalized and sent to text to speech (TTS) engine to generate speech. Image processing is a key component of the developed application as it increases the efficiency of OCR engine to a great extent. We propose a novel threshold selection method that is able to detect type of noise in the extracted image and select threshold accordingly for binary transformation. Thus it solves the problem of selecting appropriate threshold of different images and it increases the overall accuracy and efficiency of the application. Another feature that has improved the performance of introduced computer application is text normalization. Normalization of the extracted text from the OCR engine makes the text more accurate to pronounce by the TTS engine depending on the context. Finally, we present experimental results that show 80.804\% accuracy on text extraction from the PDF file and 3.92 score (out of 5) on the generated speech by human evaluation.},
	booktitle = {2021 {International} {Conference} on {Automation}, {Control} and {Mechatronics} for {Industry} 4.0 ({ACMI})},
	author = {Nayan, Md. Mizanur Rahaman and Haque, Mohammad Ariful},
	month = jul,
	year = {2021},
	keywords = {Autonomous binarization threshold selection, Computer applications, Feature extraction, image processing, Mechatronics, Neural networks, Optical character recognition software, PDF to speech, Process control, text extraction, text normalization, text to speech, Training},
	pages = {1--5},
	file = {IEEE Xplore Abstract Record:/Users/tullsen/Zotero/storage/V8A2KTFQ/9528221.html:text/html},
}

@inproceedings{mishraExtractionTheoremsProofs2021,
	address = {New York, NY, USA},
	series = {{DocEng} '21},
	title = {Towards extraction of theorems and proofs in scholarly articles},
	isbn = {978-1-4503-8596-1},
	url = {https://doi.org/10.1145/3469096.3475059},
	doi = {10.1145/3469096.3475059},
	abstract = {Scholarly articles in mathematical fields often feature mathematical statements (theorems, propositions, etc.) and their proofs. In this paper, we present preliminary work for extracting such information from PDF documents, with several types of approaches: vision (using YOLO), natural language (with transformers), and styling information (with linear conditional random fields). Our main task is to identify which parts of the paper to label as theorem-like environments and proofs. We rely on a dataset collected from arXiv, with LATeX sources of research articles used to train the models.},
	urldate = {2021-08-17},
	booktitle = {Proceedings of the 21st {ACM} {Symposium} on {Document} {Engineering}},
	publisher = {Association for Computing Machinery},
	author = {Mishra, Shrey and Pluvinage, Lucas and Senellart, Pierre},
	month = aug,
	year = {2021},
	keywords = {information extraction, proofs, scholarly articles, theorems},
	pages = {1--4},
	file = {Mishra et al. - 2021 - Towards extraction of theorems and proofs in schol.pdf:/Users/tullsen/Zotero/storage/Q75EXHAT/Mishra et al. - 2021 - Towards extraction of theorems and proofs in schol.pdf:application/pdf},
}

@inproceedings{pradhanPDFTextSentiment2021,
	address = {Singapore},
	series = {Advances in {Intelligent} {Systems} and {Computing}},
	title = {{PDF} {Text} {Sentiment} {Analysis}},
	isbn = {9789811625947},
	doi = {10.1007/978-981-16-2594-7_55},
	abstract = {Nowadays the internet has become a great source in terms of unstructured data. In the sentiment inspection, unprocessed text is operated, and it has brought different issues in computer processing. To avoid such issues, various steps and tactics are done. The paper gives an insight into the ground of sentiment inspection targeting today’s analysis works—lexicon-based work, context less categorization, and deep analysis. Sentiment mining, a main newbie subcategory in inspection, is discussed in this project. The main objective of the project is to describe a brief introduction to this emerging issue and to represent complete research of all major survey problems and the present increment in the ground. As a proof of that, this project requires greater than 400 links from all important journals. However the ground works with the natural language text, which is generally counted in the unprocessed data, this project has done a structured line or option in describing the difficulty with the objective of linking the unprocessed and processed ground and doing qualitative and quantitative analysis of emotions. It is major and important for practical applications.},
	language = {English},
	booktitle = {International {Conference} on {Innovative} {Computing} and {Communications}},
	publisher = {Springer},
	author = {Pradhan, Rahul and Gangwar, Kushagra and Dubey, Ishika},
	editor = {Khanna, Ashish and Gupta, Deepak and Bhattacharyya, Siddhartha and Hassanien, Aboul Ella and Anand, Sameer and Jaiswal, Ajay},
	month = aug,
	year = {2021},
	keywords = {PDF, Multilingual, Sentiment analysis},
	pages = {679--690},
}

@article{baeLearn2EvadeLearningbasedGenerative2021,
	title = {{Learn2Evade}: {Learning}-based {Generative} {Model} for {Evading} {PDF} {Malware} {Classifiers}},
	issn = {2691-4581},
	shorttitle = {{Learn2Evade}},
	url = {https://ieeexplore.ieee.org/abstract/document/9512394},
	doi = {10.1109/TAI.2021.3103139},
	abstract = {Recent research has shown that a small perturbation to an input may forcibly change the prediction of a machine learning (ML) model. Such variants are commonly referred to as adversarial examples. Early studies have focused mostly on ML models for image processing and expanded to other applications, including those for malware classification. In this paper, we focus on the problem of finding adversarial examples against ML-based PDF malware classifiers. We deem that our problem is more challenging than those against ML models for image processing because of the highly complex data structure of PDF and of an additional constraint that the generated PDF should exhibit malicious behavior. To resolve our problem, we propose a variant of generative adversarial networks (GANs) that generate evasive variant PDF malware (without any crash), which can be classified as benign by various existing classifiers yet maintaining the original malicious behavior. Our model exploits the target classifier as the second discriminator to rapidly generate an evasive variant PDF with our new feature selection process that includes unique features extracted from malicious PDF files. We evaluate our technique against three representative PDF malware classifiers (Hidost13, Hidost16, and PDFrate-v2) and further examine its effectiveness with AntiVirus engines from VirusTotal. To the best of our knowledge, our work is the first to analyze the performance against the commercial AntiVirus engines. Our model finds, with great speed, evasive variants for all selected seeds against state-of-the-art PDF malware classifiers and raises a serious security concern in the presence of adversaries.},
	journal = {IEEE Transactions on Artificial Intelligence},
	author = {Bae, Ho and Lee, Younghan and Kim, Yohan and Hwang, Uiwon and Yoon, Sungroh and Paek, Yunheung},
	month = aug,
	year = {2021},
	note = {Conference Name: IEEE Transactions on Artificial Intelligence},
	keywords = {Portable document format, Feature extraction, Training, Adversarial Examples, Artificial intelligence, Detectors, Evading PDF Classifiers, Generative Adversarial Networks, Malware, PDF Malware, Perturbation methods},
	pages = {1--1},
	file = {Bae et al. - 2021 - Learn2Evade Learning-based Generative Model for E.pdf:/Users/tullsen/Zotero/storage/6HB3GHHC/Bae et al. - 2021 - Learn2Evade Learning-based Generative Model for E.pdf:application/pdf},
}

@article{evansUsePDFDigital2014,
	title = {The {Use} of {PDF}/{A} in {Digital} {Archives}: {A} {Case} {Study} from {Archaeology}},
	volume = {9},
	copyright = {Copyright (c)},
	issn = {1746-8256},
	shorttitle = {The {Use} of {PDF}/{A} in {Digital} {Archives}},
	url = {http://www.ijdc.net/article/view/9.2.123?sid=SCITRUS},
	doi = {10.2218/ijdc.v9i2.267},
	abstract = {In recent years the Portable Document Format (PDF) has become a ubiquitous format in the exchange of documents; in 2005 the PDF/A profile was defined in order to meet long term accessibility needs, and has accordingly come to be regarded as a long-term archiving strategy for PDF files. In the field of archaeology, a growing number of PDF files – containing the detailed results of fieldwork and research – are beginning to be deposited with digital archives such as the Archaeology Data Service (ADS). In the ADS’ experience, the use of PDF/A has had benefits as well as drawbacks: the majority of PDF reports are now in a standard format better suited to longer-term access, however migrating to PDF/A and managing and ensuring reuse of these files is intensive, and fraught with potential pitfalls. Of these, perhaps the most serious has been an unreliability in PDF/A conformance by the wide range of tools and software now available. There are also practical and more theoretical implications for reuse which, as our discipline of archaeology alongside so many others rapidly becomes digitized, presents us with a large corpus of ‘data’ that is human readable, but may not be amenable to machine-based technologies such as NLP. It may be argued that these factors effectively undermine some of the perceived cost benefit of moving from paper to digital, as well as the longer-term sustainability of PDF/A within digital archives.},
	language = {en},
	number = {2},
	urldate = {2021-08-13},
	journal = {International Journal of Digital Curation},
	author = {Evans, Tim N. L. and Moore, Ray H.},
	month = oct,
	year = {2014},
	note = {Number: 2},
	keywords = {curation, DCC, digital curation, digital preservation, IJDC, International Journal of Digital Curation, preservation},
	pages = {123--138},
	file = {Evans and Moore - 2014 - The Use of PDFA in Digital Archives A Case Study.pdf:/Users/tullsen/Zotero/storage/ZKVTCXGH/Evans and Moore - 2014 - The Use of PDFA in Digital Archives A Case Study.pdf:application/pdf},
}

@book{mccargarNewspapersDataFormats2011,
	title = {Newspapers, data formats, and acronym stew: {Preservation} and distribution of born-digital newspapers using {METS}/{ALTO}, {NITF}, and {PDF}-{A}},
	isbn = {978-3-11-025531-7},
	shorttitle = {Newspapers, data formats, and acronym stew},
	url = {https://www.degruyter.com/document/doi/10.1515/9783110255317.115/html?sid=SCITRUS},
	abstract = {Newspapers, data formats, and acronym stew: Preservation and distribution of born-digital newspapers using METS/ALTO, NITF, and PDF-A was published in Newspapers on page 115.},
	language = {en},
	urldate = {2021-08-13},
	publisher = {De Gruyter Saur},
	author = {McCargar, Victoria and Nadal, Jacob and Snyder, Henry and Vanek, Andrea and Zarndt, Frederick},
	month = may,
	year = {2011},
	note = {Pages: 115-124
Publication Title: Newspapers
Section: Newspapers},
	file = {Snapshot:/Users/tullsen/Zotero/storage/3DZD2M5F/html.html:text/html},
}

@article{sullivanArchivalRecordsManagement2006,
	title = {An archival/records management perspective on {PDF}/{A}},
	volume = {16},
	issn = {0956-5698},
	url = {https://doi.org/10.1108/09565690610654783},
	doi = {10.1108/09565690610654783},
	abstract = {Purpose – This article sets out to explain the purpose of PDF/A, how it addresses archival and records management concerns, how PDF/A was designed to have “desirable properties of a long‐term preservation format”, and the future of PDF/A. Design/methodology/approach – The contents of this article are based on the author's knowledge and experience of the subject. Findings – It is emphasized that PDF/A must be implemented in conjunction with policies and procedures, including quality assurance procedures to ensure acceptable replication of source material. Originality/value – This article will be of interest to anyone working with PDF files. Work has already begun on PDF/A Part 2 which will be based on PDF 1.6. Application notes and a listing of frequently asked questions will be made publicly available to assist developers of PDF/A applications to better understand the requirements of the file format and provide implementation guidance.},
	number = {1},
	urldate = {2021-08-13},
	journal = {Records Management Journal},
	author = {Sullivan, Susan J.},
	month = jan,
	year = {2006},
	note = {Publisher: Emerald Group Publishing Limited},
	keywords = {Portable document format, Archives management, Electronic media, Records management},
	pages = {51--56},
	file = {Snapshot:/Users/tullsen/Zotero/storage/FL3SASGX/html.html:text/html},
}

@article{choiDataCurationPractice2021,
	title = {Data {Curation} in {Practice}: {Extract} {Tabular} {Data} from {PDF} {Files} {Using} a {Data} {Analytics} {Tool}},
	volume = {10},
	issn = {2161-3974},
	shorttitle = {Data {Curation} in {Practice}},
	url = {https://escholarship.umassmed.edu/jeslib/vol10/iss3/10},
	doi = {10.7191/jeslib.2021.1209},
	number = {3},
	journal = {Journal of eScience Librarianship},
	author = {Choi, Allis and Xin, Xuying},
	month = aug,
	year = {2021},
	file = {Choi and Xin - 2021 - Data Curation in Practice Extract Tabular Data fr.pdf:/Users/tullsen/Zotero/storage/9YYI9GTL/Choi and Xin - 2021 - Data Curation in Practice Extract Tabular Data fr.pdf:application/pdf},
}

@article{moorePreservingGreyLiterature2013,
	title = {Preserving the {Grey} {Literature} {Explosion}: {PDF}/{A} and the {Digital} {Archive}},
	volume = {25},
	issn = {1041-0031},
	shorttitle = {Preserving the {Grey} {Literature} {Explosion}},
	url = {https://www.niso.org/niso-io/2013/09/moore-evans},
	doi = {10.3789/isqv25no3.2013.04},
	language = {English},
	number = {3},
	urldate = {2021-08-13},
	journal = {Information Standards Quarterly},
	author = {Moore, Ray and Evans, Tim},
	year = {2013},
	pages = {20},
	file = {Moore and Evans - 2013 - Preserving the Grey Literature Explosion PDFA an.pdf:/Users/tullsen/Zotero/storage/PDLBALKE/Moore and Evans - 2013 - Preserving the Grey Literature Explosion PDFA an.pdf:application/pdf},
}

@article{kooPDFPDFEvaluation2013,
	title = {{PDF} to {PDF}/{A}: {Evaluation} of {Converter} {Software} for {Implementation} in {Digital} {Repository} {Workflow}},
	volume = {18},
	issn = {1361-4576},
	shorttitle = {{PDF} to {PDF}/{A}},
	url = {https://doi.org/10.1080/13614576.2013.771989},
	doi = {10.1080/13614576.2013.771989},
	abstract = {PDF/A is a version of Portable Document Format designed for archiving and preservation. Due to its popularity, many electronic documents exist in PDF format, and the ability to convert an existing PDF into a conforming PDF/A file is as important, if not more, as being able to produce documents in PDF/A format. The Florida Digital Archive conducted a study to select a PDF to PDF/A conversion application as part of its format normalization strategy in the summer of 2012. This article documents the evaluation process and presents the results in such a way that they provide insight into challenges and potential drawbacks during similar evaluation or implementation.},
	number = {1},
	urldate = {2021-08-13},
	journal = {New Review of Information Networking},
	author = {Koo, Jamin and Chou, Carol   C. H.},
	month = may,
	year = {2013},
	note = {Publisher: Routledge
\_eprint: https://doi.org/10.1080/13614576.2013.771989},
	keywords = {PDF, PDF/A, conversion, converter software, normalization, software evaluation},
	pages = {1--15},
	file = {Snapshot:/Users/tullsen/Zotero/storage/VPE2SCBE/13614576.2013.html:text/html},
}

@article{hanTIFFJPEG2000PDF2015,
	title = {Beyond {TIFF} and {JPEG2000}: {PDF}/{A} as an {OAIS} submission information package container},
	volume = {33},
	issn = {0737-8831},
	shorttitle = {Beyond {TIFF} and {JPEG2000}},
	url = {https://doi.org/10.1108/LHT-06-2015-0068},
	doi = {10.1108/LHT-06-2015-0068},
	abstract = {Purpose The purpose of this paper is to introduce PDF/A to replace TIFF as the preferred file format for digitization of textual documents. In addition, PDF/A can be used as an open archival information system (OAIS) submission information package (SIP) container to reduce digitization and digital preservation costs. Design/methodology/approach The author first reviewed the current digitization guidelines, the OAIS model and provides on an overview of the development PDF and PDF/A as international standards. Then literature review of the uses of PDF/A is presented. The author analyzed pitfalls of TIFFs as the preferred format for digitization, and showed how to use PDF/A to code digitization SIP. Findings TIFF file format has been the preferred master file format by Federal Agency Digitization Guidelines Initiative digitization guidelines for the past 20 years. However, there are drawbacks of TIFF format. Literature reviews show that PDF/A has been the preferred standard for coding born-digital documents in court, government and business sectors. PDF/A-2 and PDF/A-3 are relatively new standards released after 2010. However, few understood the standards and have utilized the full potentials in digitization. The author shows that PDF/A can be used as an OAIS SIP container. Practical implications In order to delivery OAIS SIPs, current practices require a combination of files, directories and various types of metadata. The author shows that PDF/A (PDF/A-2 and/or PDF/A-3) can be a better file format for textual document digitization with coding various types of metadata in extensible metadata platform and arbitrary file/data can be coded in PDF/A-3. These features in PDF/A provide much better ways to deliver SIPs in a cost-efficient manner. Originality/value PDF/A has been recognized as the preferred standard for born-digital documents, but it has not been used as the preferred file format for digitized materials. The author recommends that: PDF/A with lossless JPX compressions as the preferred file format; and PDF/A with lossless JPX compressions along with metadata/data as the preferred OAIS SIP container. As a result, the uses reduce costs in digitization and digital preservation and also increase productivity. The author recommends to update the national and international digitization practices using PDF/A.},
	language = {English},
	number = {3},
	urldate = {2021-08-13},
	journal = {Library Hi Tech},
	author = {Han, Yan},
	month = jan,
	year = {2015},
	note = {Publisher: Emerald Group Publishing Limited},
	keywords = {PDF/A, Digital documents, Digital preservation, Digitization, Standards},
	pages = {409--423},
	file = {Snapshot:/Users/tullsen/Zotero/storage/W9E3TNBD/html.html:text/html},
}

@article{noonanPDFViableAddition2010,
	title = {{PDF}/{A}: {A} {Viable} {Addition} to the {Preservation} {Toolkit}},
	volume = {16},
	issn = {1082-9873},
	shorttitle = {{PDF}/{A}},
	url = {http://www.dlib.org/dlib/november10/noonan/11noonan.html},
	doi = {10.1045/november2010-noonan},
	language = {English},
	number = {11/12},
	urldate = {2021-08-13},
	journal = {D-Lib Magazine},
	author = {Noonan, Daniel W. and McCrory, Amy and Black, Elizabeth L.},
	month = nov,
	year = {2010},
}

@inproceedings{usilinStructuralCompressionDocument2010,
	title = {Structural {Compression} {Of} {Document} {Images} {With} {PDF}/{A}},
	isbn = {978-0-9564944-0-5},
	url = {http://www.scs-europe.net/dlib/2010/2010-0242.htm},
	doi = {10.7148/2010-0242-0246},
	language = {English},
	urldate = {2021-08-13},
	booktitle = {{ECMS} 2010 {Proceedings} edited by {A} {Bargiela} {S} {A} {Ali} {D} {Crowley} {E} {J} {H} {Kerckhoffs}},
	publisher = {ECMS},
	author = {Usilin, Sergey A. and Nikolaev, Dmitry P. and Postnikov, Vassili V.},
	month = jun,
	year = {2010},
	pages = {242--246},
	file = {Usilin et al. - 2010 - Structural Compression Of Document Images With PDF.pdf:/Users/tullsen/Zotero/storage/RSTXWPRZ/Usilin et al. - 2010 - Structural Compression Of Document Images With PDF.pdf:application/pdf},
}

@article{suriLostMigrationDocument2018a,
	title = {Lost in migration: document quality for batch conversion to {PDF}/{A}},
	volume = {39},
	issn = {0737-8831},
	shorttitle = {Lost in migration},
	url = {https://doi.org/10.1108/LHT-10-2017-0220},
	doi = {10.1108/LHT-10-2017-0220},
	abstract = {Purpose Changes in file format specifications challenge long-term preservation of digital documents. Digital archives thus often focus on specific file formats that are well suited for long-term preservation, such as the PDF/A format. Since only few customers submit PDF/A files, digital archives may consider converting submitted files to the PDF/A format. The paper aims to discuss these issues. Design/methodology/approach The authors evaluated three software tools for batch conversion of common file formats to PDF/A-1b: LuraTech PDF Compressor, Adobe Acrobat XI Pro and 3-HeightsTM Document Converter by PDF Tools. The test set consisted of 80 files, with 10 files each of the eight file types JPEG, MS PowerPoint, PDF, PNG, MS Word, MS Excel, MSG and “web page.” Findings Batch processing was sometimes hindered by stops that required manual interference. Depending on the software tool, three to four of these stops occurred during batch processing of the 80 test files. Furthermore, the conversion tools sometimes failed to produce output files even for supported file formats: three (Adobe Pro) up to seven (LuraTech and 3-HeightsTM) PDF/A-1b files were not produced. Since Adobe Pro does not convert e-mails, a total of 213 PDF/A-1b files were produced. The faithfulness of each conversion was investigated by comparing the visual appearance of the input document with that of the produced PDF/A-1b document on a computer screen. Meticulous visual inspection revealed that the conversion to PDF/A-1b impaired the information content in 24 of the converted 213 files (11 percent). These reproducibility errors included loss of links, loss of other document content (unreadable characters, missing text, document part missing), updated fields (reflecting time and folder of conversion), vector graphics issues and spelling errors. Originality/value These results indicate that large-scale batch conversions of heterogeneous files to PDF/A-1b cause complex issues that need to be addressed for each individual file. Even with considerable efforts, some information loss seems unavoidable if large numbers of files from heterogeneous sources are migrated to the PDF/A-1b format.},
	number = {2},
	urldate = {2021-08-13},
	journal = {Library Hi Tech},
	author = {Suri, Roland Erwin and El-Saad, Mohamed},
	month = jan,
	year = {2018},
	note = {Publisher: Emerald Publishing Limited},
	keywords = {Academic libraries, Archives, Conversion, Digital documents, Digital libraries, Digital preservation},
	pages = {337--351},
	file = {Snapshot:/Users/tullsen/Zotero/storage/VJE4VZQX/html.html:text/html},
}

@inproceedings{pariziCyberPDFSmartSecure2018,
	address = {Washington, DC},
	series = {{CHASE} '18},
	title = {{CyberPDF}: smart and secure coordinate-based automated health {PDF} data batch extraction},
	isbn = {978-1-4503-5958-0},
	shorttitle = {{CyberPDF}},
	url = {https://doi.org/10.1145/3278576.3281274},
	doi = {10.1145/3278576.3281274},
	abstract = {Data extraction from files is a prevalent activity in today's electronic health record systems which can be laborious. When document analysis is repetitive (e.g., processing a series of files with the same layout and extraction requirements), relying on data-entry staff to manually perform such tasks is costly and highly insecure. Particularly analyzing a large list of PDF files (as a widely used format) to extract specific data and migrate them to other destinations for later use is both tedious and frustrating to do manually. This paper addresses a very practical requirement of batch extracting data from PDF files in health data document analysis and beyond. Specifically, we propose a Coordinate Based Information Extraction System (CBIES) to instrument a smart and automatic PDF batch data extraction tool, releasing health organizations from duplicate efforts and reducing labor costs. The proposed technique enables users to query a representative PDF document and extract the same data from a series of files in the batch analysis manner swiftly. Furthermore, since security and privacy considerations are essential part of any health record systems, it is included in our approach. Based on CBIES, we implement a prototype tool for PDF batch data extraction technique named, CyberPDF. The tool exhibits great efficiency, security and accuracy in multi-file data processing.},
	language = {English},
	urldate = {2020-05-09},
	booktitle = {Proceedings of the 2018 {IEEE}/{ACM} {International} {Conference} on {Connected} {Health}: {Applications}, {Systems} and {Engineering} {Technologies}},
	publisher = {Association for Computing Machinery},
	author = {Parizi, Reza M. and Guo, Liang and Bian, Yao and Dehghantanha, Ali and Azmoodeh, Amin and Choo, Kim-Kwang Raymond},
	month = sep,
	year = {2018},
	keywords = {batch processing, data extraction, document analysis, PDF data, secure health record aystems, software tool support},
	pages = {106--111},
	file = {Parizi et al. - 2018 - CyberPDF smart and secure coordinate-based automa.pdf:/Users/tullsen/Zotero/storage/M63M49RM/Parizi et al. - 2018 - CyberPDF smart and secure coordinate-based automa.pdf:application/pdf},
}

@article{caffrey-hillPDFStandsProblematic2021,
	title = {{PDF}: {The} "{P}" {Stands} for {Problematic}},
	volume = {4},
	shorttitle = {{PDF}},
	url = {https://journals.publishing.umich.edu/weaveux/article/id/279/},
	doi = {10.3998/weaveux.279},
	abstract = {The Portable Document Format (PDF) is one of the most common document file types in academia, both in the library and the classroom. Unfortunately, PDF poses unique barriers to accessibility, particularly for the visually impaired. Ensuring that all people can read PDF content can be complex and expensive. There are alternative formats that can be made accessible with a lower level of effort, providing a better experience for both the end reader and the document author. This article serves as a call to arms for higher education to migrate away from PDF and to urge the tech community to develop new file formats that lend themselves to enhanced accessibility on a limited budget.},
	language = {English},
	number = {1},
	urldate = {2021-07-27},
	journal = {Weave: Journal of Library User Experience},
	author = {Caffrey-Hill, Julia and Clark, Nathan and Davis, Brent and Helman, William},
	month = jul,
	year = {2021},
	note = {Number: 1
Publisher: Michigan Publishing},
}

@misc{rossmooreImplementingPDFStandards2018,
	title = {Implementing {PDF} standards for {Mathematical} {Publishing}},
	language = {English},
	publisher = {Macquarie University},
	author = {{Ross Moore}},
	month = may,
	year = {2018},
	file = {Implementing PDF standards for Mathematical Publis.pdf:/Users/tullsen/Zotero/storage/GCBAGXYD/Implementing PDF standards for Mathematical Publis.pdf:application/pdf},
}

@mastersthesis{lehrhuberPDFSupportQualitativeresearch2021,
	title = {{PDF} support for qualitativeresearch in the cloud},
	url = {https://oss.cs.fau.de/wp-content/uploads/2021/07/lehrhuber_2021.pdf},
	abstract = {ffective Qualitative Data Analysis (QDA) using software tools relies on the range
of supported document types to work with. The Portable Document Format
(PDF) standard is widely known and used because of its versatility. Therefore,
the support of PDF documents in QDA software is essential.
The cloud-based QDA tool ‘QDAcity’ only supports Rich Text Format (RTF)
documents. In this thesis, we design and implement PDF support for QDAcity.
Since the current state of QDAcity does not allow to easily extend the range
of supported document types, our implementation is required to allow this in
the future. The main challenge however is to design a coding mechanism, that
can handle different document types. Using the coding mechanism, researchers
annotate segments of the qualitative data to extract a theory. To implement such
a coding mechanism, we analyzed the implementation of different QDA tools and
evaluated various implementation strategies for the cloud.
Different types of documents require different types of codings, such as area
codings that can be used for image data. Our implemented coding mechanism
therefore can be extended by future coding types and already includes support
for area codings.},
	language = {English, German},
	school = {Friedrich-Alexander-Universit ̈at Erlangen-N ̈urnberg},
	author = {Lehrhuber, Julian},
	month = may,
	year = {2021},
	file = {Lehrhuber - PDF support for qualitativeresearch in the cloud.pdf:/Users/tullsen/Zotero/storage/WDTX7BUU/Lehrhuber - PDF support for qualitativeresearch in the cloud.pdf:application/pdf},
}

@misc{jameskettleHTTPSequelAlways2021,
	title = {{HTTP}/2: {The} {Sequel} is {Always} {Worse}},
	shorttitle = {{HTTP}/2},
	url = {https://portswigger.net/research/http2},
	abstract = {In this research paper James Kettle introduces multiple new classes of HTTP/2-exclusive attacks, demonstrated on popular websites and servers.},
	language = {English},
	urldate = {2021-08-10},
	journal = {PortSwigger Research},
	author = {{James Kettle}},
	month = aug,
	year = {2021},
	file = {James Kettle - 2021 - HTTP2 The Sequel is Always Worse.pdf:/Users/tullsen/Zotero/storage/46954MMG/James Kettle - 2021 - HTTP2 The Sequel is Always Worse.pdf:application/pdf},
}

@misc{HammerParserCombinators2019,
	title = {Hammer: {Parser} combinators for binary formats, in {C}. {Yes}, in {C}. {What}? {Don}'t look at me like that.},
	copyright = {GPL-2.0},
	shorttitle = {Parser combinators for binary formats, in {C}. {Yes}, in {C}. {What}?},
	url = {https://github.com/UpstandingHackers/hammer},
	urldate = {2019-07-23},
	publisher = {UpstandingHackers},
	month = may,
	year = {2019},
	note = {original-date: 2012-04-22T23:12:01Z},
}

@misc{couprieRustParserCombinator2019,
	title = {Rust parser combinator framework. {Contribute} to {Geal}/nom development by creating an account on {GitHub}},
	copyright = {MIT},
	url = {https://github.com/Geal/nom},
	urldate = {2019-07-23},
	author = {Couprie, Geoffroy},
	month = jul,
	year = {2019},
	note = {original-date: 2014-11-23T11:16:23Z},
}

@techreport{birkholzConciseDataDefinition2019,
	type = {Request for {Comments}},
	title = {Concise {Data} {Definition} {Language} ({CDDL}): {A} {Notational} {Convention} to {Express} {Concise} {Binary} {Object} {Representation} ({CBOR}) and {JSON} {Data} {Structures}},
	shorttitle = {Concise {Data} {Definition} {Language} ({CDDL})},
	url = {https://datatracker.ietf.org/doc/rfc8610},
	abstract = {This document proposes a notational convention to express Concise Binary Object Representation (CBOR) data structures (RFC 7049). Its main goal is to provide an easy and unambiguous way to express structures for protocol messages and data formats that use CBOR or JSON.},
	number = {RFC 8610},
	urldate = {2021-08-07},
	institution = {Internet Engineering Task Force},
	author = {Birkholz, Henk and Vigano, Christoph and Bormann, Carsten},
	month = jun,
	year = {2019},
	doi = {10.17487/RFC8610},
	note = {Num Pages: 64},
	file = {Birkholz et al. - 2019 - Concise Data Definition Language (CDDL) A Notatio.pdf:/Users/tullsen/Zotero/storage/SRT8B48P/Birkholz et al. - 2019 - Concise Data Definition Language (CDDL) A Notatio.pdf:application/pdf},
}

@misc{ProtobuffersAreWrong,
	title = {Protobuffers {Are} {Wrong} :: {Reasonably} {Polymorphic}},
	url = {https://reasonablypolymorphic.com/blog/protos-are-wrong/index.html},
	urldate = {2021-08-07},
	file = {Protobuffers Are Wrong \:\: Reasonably Polymorphic:/Users/tullsen/Zotero/storage/2ZXTCEVB/index.html:text/html},
}

@misc{ichEWGM2Recommendation2014,
	title = {{EWG} {M2} {Recommendation} to the {ICH} {Steering} {Committee} {Electronic} {Standards} for the {Transfer} of {Regulatory} {Information} ({ESTRI}) - {File} {Format} {Recommendation} – {PDF}/{A}},
	shorttitle = {{ICH} {M2} {Recommendations} \& {Technical} {References} - {PDF}/{A}},
	url = {https://www.ich.org/page/m2-recommendations-technical-references},
	language = {English},
	publisher = {International Council for Harmonisation of Technical Requirements for Pharmaceuticals for Human Use (ICH)},
	author = {{ICH}},
	month = jun,
	year = {2014},
	note = {"ICH M2 File Format Criteria" v1 10/Nov/2014},
	file = {ICH - 2014 - EWG M2 Recommendation to the ICH Steering Committe.pdf:/Users/tullsen/Zotero/storage/TDULAC8K/ICH - 2014 - EWG M2 Recommendation to the ICH Steering Committe.pdf:application/pdf},
}

@misc{ichEWGM2Recommendation2011,
	title = {{EWG} {M2} {Recommendation} to the {ICH} {Steering} {Committee} {Electronic} {Standards} for the {Transfer} of {Regulatory} {Information} ({ESTRI}) - {File} {Format} {Recommendation} – {PDF}},
	shorttitle = {{ICH} {M2} {Recommendations} \& {Technical} {References} - {PDF}},
	url = {https://www.ich.org/page/m2-recommendations-technical-references},
	language = {English},
	publisher = {International Council for Harmonisation of Technical Requirements for Pharmaceuticals for Human Use (ICH)},
	author = {{ICH}},
	month = apr,
	year = {2011},
	note = {"ICH M2 File Format Criteria" v1 10/Nov/2014},
	file = {ICH - 2011 - EWG M2 Recommendation to the ICH Steering Committe.pdf:/Users/tullsen/Zotero/storage/MV37V337/ICH - 2011 - EWG M2 Recommendation to the ICH Steering Committe.pdf:application/pdf},
}

@inproceedings{vishnuStaticDynamicLearningBased2021,
	address = {Singapore},
	series = {Lecture {Notes} in {Electrical} {Engineering}},
	title = {Static and {Dynamic} {Learning}-{Based} {PDF} {Malware} {Detection} classifiers—{A} {Comparative} {Study}},
	isbn = {9789811630675},
	url = {https://link.springer.com/chapter/10.1007/978-981-16-3067-5_41},
	doi = {10.1007/978-981-16-3067-5_41},
	abstract = {The malicious software are still accounting up as a substantial threat to the cyber world. The most widely used vectors to infect different systems using malware are the document files. In this, the attacker tries to blend the malevolent code with the benign document files to carry out the attack. Portable document format (PDF) is the most commonly used document format to share the documents due to its portability and light weight. In this modern era, the attackers are implementing highly advance techniques to obfuscate the malware inside the document file. So, it becomes difficult for the malware detection classifiers to classify the document efficiently. These classifiers can be of two main type, namely, static and dynamic. In this paper, we surveyed various static and dynamic learning-based PDF malware classifiers to understand their architecture and working procedures. We also have presented the structure of the PDF files to understand the sections of PDF document where the malevolent code can be implanted. At the end, we performed a comparative study on the different surveyed classifiers by observing their true Positive percentages and F1 score.},
	language = {English},
	booktitle = {Applications of {Artificial} {Intelligence} and {Machine} {Learning}},
	publisher = {Springer},
	author = {Vishnu, N. S. and Lakshmi, Sripada Manasa and Shukla, Awadhesh Kumar},
	editor = {Choudhary, Ankur and Agrawal, Arun Prakash and Logeswaran, Rajasvaran and Unhelkar, Bhuvan},
	month = jul,
	year = {2021},
	keywords = {Machine learning, Feature extraction, Malware, Adversary, Classifier, Obfuscation, Parser, Portable document format (PDF), Processing},
	pages = {551--571},
}

@article{tomasevicExploringAnnotationsMusical2021,
	title = {Exploring annotations for musical pattern discovery gathered with digital annotation tools},
	volume = {0},
	issn = {1745-9737},
	url = {https://doi.org/10.1080/17459737.2021.1943026},
	doi = {10.1080/17459737.2021.1943026},
	abstract = {The study of inter-annotator agreement in musical pattern annotations has gained increased attention over the past few years. While expert annotations are often taken as the reference for evaluating pattern discovery algorithms, relying on just one reference is not usually sufficient to capture the complex musical relations between patterns. In this paper, we address the potential of digital annotation tools to enable large-scale annotations of musical patterns, by comparing datasets gathered with two recently developed digital tools. We investigate the influence of the tools and different annotator backgrounds on the annotation process by performing inter-annotator agreement analysis and feature-based analysis on the annotated patterns. We discuss implications for further adaptation of annotation tools, and the potential for deriving reference data from such rich annotation datasets for the evaluation of automatic pattern discovery algorithms in the future.},
	number = {0},
	urldate = {2021-07-27},
	journal = {Journal of Mathematics and Music},
	author = {Tomašević, Darian and Wells, Stephan and Ren, Iris Yuping and Volk, Anja and Pesek, Matevž},
	month = jul,
	year = {2021},
	note = {Publisher: Taylor \& Francis
\_eprint: https://doi.org/10.1080/17459737.2021.1943026},
	keywords = {00A65, 97R50, digital annotation tool, feature analysis, inter-annotator agreement, musical pattern},
	pages = {1--14},
	file = {Tomašević et al. - 2021 - Exploring annotations for musical pattern discover.pdf:/Users/tullsen/Zotero/storage/AK8LCKVA/Tomašević et al. - 2021 - Exploring annotations for musical pattern discover.pdf:application/pdf;Snapshot:/Users/tullsen/Zotero/storage/758Z3XB4/17459737.2021.html:text/html},
}

@misc{isotc130jwg7ISO1861920152015,
	title = {{ISO} 18619:2015 {Image} technology colour management — {Black} point compensation},
	copyright = {Copyright ISO},
	url = {https://www.iso.org/standard/63033.html},
	abstract = {ISO 18619:2015 specifies a procedure, including computation, by which a transform between ICC profiles can be adjusted (compensated) to take into account differences between the dark end of the source colour space and the dark end of the destination colour space. This is referred to as black point compensation (BPC). The relative colorimetric encoding of ICC profile transforms already provides a mechanism for such adjustment of the light (white) end of the tone scale.},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 130 JWG 7}},
	month = jul,
	year = {2015},
}

@misc{isotc130jwg7ISOTS218302018,
	title = {{ISO}/{TS} 21830:2018 {Image} technology colour management — {Black} point compensation for n-colour {ICC} profiles},
	copyright = {Copyright ISO},
	url = {https://www.iso.org/standard/71898.html},
	abstract = {This document specifies a procedure, including computation, for extending the method described in ISO 18619:2015 to n-colour ICC profiles specifically for the xCLR cases where the colourants are either CMYK plus combinations from the set of red, orange, green, blue and violet or where, for the 3CLR case, the colourants are CMY-like chromatic colourants with widely-spaced hue angles. Other types of colour spaces which are otherwise permitted by ISO 15076-1, such as 2CLR (two-device colourants), are not addressed by this document.},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 130 JWG 7}},
	year = {2018},
}

@inproceedings{liuPDFMalwareDetection2021,
	address = {Cham},
	series = {Lecture {Notes} in {Computer} {Science}},
	title = {{PDF} {Malware} {Detection} {Using} {Visualization} and {Machine} {Learning}},
	volume = {12840},
	isbn = {978-3-030-81242-3},
	url = {https://link.springer.com/chapter/10.1007/978-3-030-81242-3_12},
	doi = {10.1007/978-3-030-81242-3_12},
	abstract = {Recently, as more and more disasters caused by malware have been reported worldwide, people started to pay more attention to malware detection to prevent malicious attacks in advance. According to the diversity of the software platforms that people use, the malware also varies pretty much, for example: Xcode Ghost on iOS apps, FakePlayer on Android apps, and WannaCrypt on PC. Moreover, most of the time people ignore the potential security threats around us while surfing the internet, processing files or even reading email. The Portable Document Format (PDF) file, one of the most commonly used file types in the world, can be used to store texts, images, multimedia contents, and even scripts. However, with the increasing popularity and demands of PDF files, only a small fraction of people know how easy it could be to conceal malware in normal PDF files. In this paper, we propose a novel technique combining Malware Visualization and Image Classification to detect PDF files and identify which ones might be malicious. By extracting data from PDF files and traversing each object within, we can obtain the holistic tree-like structure of PDF files. Furthermore, according to the signature of the objects in the files, we assign different colors obtained from SimHash to generate RGB images. Lastly, our proposed model trained by the VGG19 with CNN architecture achieved up to 0.973 accuracy and 0.975 F1-score to distinguish malicious PDF files, which is viable for personal, or enterprise-wide use and easy to implement.},
	language = {English},
	booktitle = {Data and {Applications} {Security} and {Privacy} {XXXV}},
	publisher = {Springer International Publishing},
	author = {Liu, Ching-Yuan and Chiu, Min-Yi and Huang, Qi-Xian and Sun, Hung-Min},
	editor = {Barker, Ken and Ghazinour, Kambiz},
	month = jul,
	year = {2021},
	keywords = {Machine learning, Malware detection, Malware visualization, PDF malware},
	pages = {209--220},
}

@article{azkueEmbeddingInteractiveThreedimensional2021,
	title = {Embedding interactive, three-dimensional content in portable document format to deliver gross anatomy information and knowledge},
	issn = {1098-2353},
	url = {https://pubmed.ncbi.nlm.nih.gov/33982339/},
	doi = {10.1002/ca.23755},
	abstract = {The Portable Document Format (PDF) is likely the most widely used digital file format for scholarly and scientific electronic publishing. Since format specification version 1.6, three-dimensional (3D) models in Universal 3D (U3D) format can be embedded into PDF files. The present study demonstrates a repertoire of graphic strategies and modes of presentation that exploit the potentials of 3D models embedded in PDF to deliver anatomical information and knowledge. Three-dimensional models and scenes representing anatomical structures generated by 3D surface scanning or by segmentation from either clinical imaging data or cadaver sectional images were converted into U3D format and then embedded into PDF files using both freely and commercially available software. The relevant steps and required software tools are described. Built-in tools in Adobe Acrobat and JavaScript scripting both were used to pre-configure user interaction with 3D contents. Eight successive proof-of-concept examples of increasing complexity are presented and provided as supplementary material, including both unannotated and annotated 3D specimens, use of bitmap-textures, guided navigation through predetermined 3D scenes, 3D animation, and interactive navigation through tri-planar sectional human cadaver images. Three-dimensional contents embedded in PDF files are generally comparable to multimedia and dedicated 3D software in terms of quality, flexibility, and convenience, and offer new unprecedented opportunities to deliver anatomical information and knowledge.},
	language = {eng},
	journal = {Clinical Anatomy (New York, N.Y.)},
	author = {Azkue, Jon Jatsu},
	month = may,
	year = {2021},
	pmid = {33982339},
	note = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/ca.23755},
	keywords = {3D visualization, medical education, multimedia, surface rendering},
	file = {Azkue - 2021 - Embedding interactive, three-dimensional content i.pdf:/Users/tullsen/Zotero/storage/G63W9HKS/Azkue - 2021 - Embedding interactive, three-dimensional content i.pdf:application/pdf},
}

@misc{KSUCapstoneInteractive2021,
	title = {{KSU} {Capstone} - {Interactive} {PDF} {Editing} for online classes},
	url = {https://sites.google.com/view/ksu-capstone-pdf-editor/home},
	abstract = {Project Description},
	language = {en},
	urldate = {2021-05-06},
	month = apr,
	year = {2021},
	file = {KSU Capstone - Interactive PDF Editing for online .pdf:/Users/tullsen/Zotero/storage/BVQRUSZ4/KSU Capstone - Interactive PDF Editing for online .pdf:application/pdf},
}

@article{griffithsPDFMultilevelBookmarks2010,
	title = {{PDF} {Multi}-level {Bookmarks} {Via} {SAS}®},
	language = {English},
	author = {Griffiths, Steve},
	year = {2010},
	pages = {18},
	file = {Griffiths - PDF Multi-level Bookmarks Via SAS®.pdf:/Users/tullsen/Zotero/storage/T64CW2AL/Griffiths - PDF Multi-level Bookmarks Via SAS®.pdf:application/pdf},
}

@article{hoshinoDevelopmentLectureSupport2013,
	title = {Development of lecture support web system: {Automatically} combining text-fields with {PDF} document for note-taking},
	issn = {0913-5685; 2432-6380},
	shorttitle = {Development of lecture support web system},
	url = {https://www.ieice.org/ken/paper/20131219XBHS/eng/},
	language = {ja},
	urldate = {2021-04-17},
	journal = {IEICE Technical Report; IEICE Tech. Rep.},
	author = {Hoshino, Yuki and Tanimura, Yu and Nishimura, Hiromitsu and Shimeno, Hiroshi and Notomi, Kazuhiro and Saito, Keiichi},
	month = dec,
	year = {2013},
	note = {Publisher: IEICE},
}

@book{langdonDesigningInclusion2020,
	title = {Designing for {Inclusion}},
	isbn = {978-3-030-43865-4},
	url = {https://books.google.com.au/books?id=I1TdDwAAQBAJ},
	language = {English},
	publisher = {Springer Nature},
	author = {Langdon, Patrick},
	month = apr,
	year = {2020},
	note = {Google-Books-ID: I1TdDwAAQBAJ},
}

@inproceedings{iqbalMaliciousImageDetection2021,
	title = {Malicious {Image} {Detection} {Using} {Convolutional} {Neural} {Network}},
	doi = {10.1109/AIMS52415.2021.9466042},
	abstract = {Images have become a threat to the security of the systems and networks since JPEG headers are concealed with malicious payloads. Header in a JPEG has many segments which can be manipulated with executable codes to prepare for malware attack. Images are usually perceived as harmless and non-risky by the users so they have become the focus of attention for carrying the cyber-attacks. Security threats in systems and networks which are caused by malicious images, are needed to be minimized by introducing a detection technique, a technique which can involve features of headers. In our proposed method JPEG headers are transformed into grayscale images to employ classification. Convolutional Neural Network based model is proposed which aims the detection of malicious images. We have used a dataset of JPEGs which was collected from different honeypots installed by CRC of Bahria University. Dataset contains 1100 malicious and 1100 benign images to employ the detection method based on deep learning. We have achieved 96\% accuracy. Our method of malicious image detection would help everyone to prevent the malware attacks which are carried through images.},
	booktitle = {2021 {International} {Conference} on {Artificial} {Intelligence} and {Mechatronics} {Systems} ({AIMS})},
	author = {Iqbal, Ahsan and Tehsin, Samabia and Kausar, Sumaira and Mishal, Nayab},
	month = apr,
	year = {2021},
	keywords = {Steganography, Feature extraction, Malware, Attack, Convolutional Neural Network, Deep learning, Gray-scale, Image Processing, Image segmentation, Social networking (online), Transform coding},
	pages = {1--6},
	file = {IEEE Xplore Abstract Record:/Users/tullsen/Zotero/storage/SCV8KVAP/9466042.html:text/html},
}

@article{sitifatonahDEVELOPMENTPROFESSIONALFLIP2021,
	title = {{THE} {DEVELOPMENT} {OF} {PROFESSIONAL} {FLIP} {PDF} {BASED} {LEARNING} {MEDIA} {IN} {THEMATIC} {LEARNING} {AT} {THE} {THIRD} {GRADE} {STUDENTS} {OF} {ELEMENTARY} {SCHOOL}},
	volume = {24},
	copyright = {Creative Commons Attribution-ShareAlike 4.0},
	url = {http://journal.uin-alauddin.ac.id/index.php/lentera_pendidikan/article/view/20625},
	abstract = {Thematic learning requires teachers' creativity in selecting and developing learning media; one of them is by using professional flip pdf learning media. This study aimed to determine the feasibility and investigate teachers' and students' responses towards professional flip pdf for thematic learning with the 'loving plants and animals' theme. This research used Research and Development method with Borg and Gall model. The data collecting procedures were through observation, interview, and documentation. The results indicated that The development of learning media professional flip pdf gained 94.54\% from media experts with the very feasible category, material experts 86.66\% with the very feasible category, and linguists 94.54\% with a very feasible category. In addition, the average of teachers' response obtained was 95\% with the very feasible category. The small-scale trial was 91.3\%, and the large-scale trial was obtained 95.8\%, both with very attractive interpretation criteria. Therefore, professional pdf flip-based learning media are very feasible and very interesting to be used as thematic learning media.},
	language = {English},
	number = {1},
	urldate = {2021-07-03},
	author = {{Siti Fatonah} and {Teguh Yunianto}},
	month = jun,
	year = {2021},
	file = {THE DEVELOPMENT OF PROFESSIONAL FLIP PDF BASED LEA.pdf:/Users/tullsen/Zotero/storage/HY5HARSU/THE DEVELOPMENT OF PROFESSIONAL FLIP PDF BASED LEA.pdf:application/pdf},
}

@patent{stanifordSystemsMethodsDetecting2015,
	title = {Systems and methods for detecting malicious {PDF} network content},
	url = {http://www.freepatentsonline.com/9118715.html},
	abstract = {Systems and methods for detecting malicious PDF network content are provided herein. According to some embodiments, the methods may include at least the steps of examining received PDF network content to determine if one or more suspicious characteristics indicative of malicious network content are included in the PDF network content, providing PDF network content determined to include at least one suspicious characteristic to one or more virtual machines, and analyzing responses received from the one or more virtual machines to verify the inclusion of malicious network content in the PDF network content determined to include at least one suspicious characteristic.},
	nationality = {United States},
	assignee = {FireEye, Inc. (Milpitas, CA, US)},
	number = {9118715},
	urldate = {2020-08-31},
	author = {Staniford, Stuart Gresley and Aziz, Ashar},
	collaborator = {Lesniewski, Victor D. and Rutan \& Tucker, Llp},
	month = aug,
	year = {2015},
}

@patent{stanifordSystemsMethodsAnalyzing2016,
	title = {Systems and methods for analyzing malicious {PDF} network content},
	url = {http://www.freepatentsonline.com/9438622.html},
	abstract = {Systems and methods for analyzing malicious PDF network content are provided herein. According to some embodiments, a PDF parser examines a body portion of a PDF document received over a network and intended for a digital device and determines if one or more suspicious characteristics indicative of malicious network content are included in the examined body portion of the PDF document. The examined body portion of the PDF document is lesser in size than an entirety of the body portion of the PDF document. When the portion of the body section of the PDF document is determined to include one or more suspicious characteristics indicative of malicious network content, the PDF document is provided to one or more virtual machines associated with the digital device to verify the inclusion of malicious network content in the portion of the body section of the PDF document. Such verification comprises execution of a PDF reader application by the one or more virtual machines to process the portion of the body section of the PDF document and monitor behavior of the PDF document so as to determine if the portion of the body section of the PDF document includes malicious network content.},
	nationality = {United States},
	assignee = {FireEye, Inc. (Milpitas, CA, US)},
	number = {9438622},
	urldate = {2020-08-31},
	author = {Staniford, Stuart Gresley and Aziz, Ashar},
	collaborator = {Lesniewski, Victor D. and Rutan \& Tucker, Llp},
	month = sep,
	year = {2016},
}

@inproceedings{marateaAutomaticGenerationSCORM2012,
	address = {New York, NY, USA},
	series = {{CompSysTech} '12},
	title = {Automatic {Generation} of {SCORM} {Compliant} {Metadata} for {Portable} {Document} {Format} {Files}},
	isbn = {978-1-4503-1193-9},
	url = {http://doi.acm.org/10.1145/2383276.2383328},
	doi = {10.1145/2383276.2383328},
	abstract = {The Shareable Content Object Reference Model (SCORM) is a widely adopted collection of specifications for web-based e-learning to which most Learning Management Systems adhere. While it allows reusability of content, it requires extensive, slow and expensive metadata annotation, and this fact prevents many content producers from properly creating and using Learning Objects. We propose an automatic metadata generation procedure that allows to label specific Learning Objects (scientific papers) with general metadata compliant to the SCORM. As some metadata are intrinsically unrelated to structure while others are strictly connected to structure, two different techniques were developed: one based on vocabularies and the other based on structural features. Results show that, in the provided context and for the "general" metadata category, the accuracy of annotations is comparable to that of a human expert.},
	urldate = {2019-04-28},
	booktitle = {Proceedings of the 13th {International} {Conference} on {Computer} {Systems} and {Technologies}},
	publisher = {ACM},
	author = {Maratea, Antonio and Petrosino, Alfredo and Manzo, Mario},
	year = {2012},
	note = {event-place: Ruse, Bulgaria},
	keywords = {automatic metadata generation, LOM, portable document format, SCORM, vector space model},
	pages = {360--367},
	file = {Maratea et al. - 2012 - Automatic Generation of SCORM Compliant Metadata f.pdf:/Users/tullsen/Zotero/storage/ZFP849KN/Maratea et al. - 2012 - Automatic Generation of SCORM Compliant Metadata f.pdf:application/pdf},
}

@inproceedings{ricoTakingAdvantage3D2019,
	address = {New York, NY, USA},
	series = {{TEEM}'19},
	title = {Taking advantage of {3D} technology in health sciences: {3D} {PDF}},
	isbn = {978-1-4503-7191-9},
	shorttitle = {Taking advantage of {3D} technology in health sciences},
	url = {https://doi.org/10.1145/3362789.3362812},
	doi = {10.1145/3362789.3362812},
	abstract = {We pretend show the possibilities of use and handling of three-dimensional biomedical models (3D) stored in PDF (portable document format). It is used in the maxillofacial region. Many free and paid programs are available for the creation of 3D models. Currently, the latest versions of the computed tomography (CT) machines software include compatible packages for image creation as well as 3D models. These documents offer a wide range of application in the health sciences, as they combine the advantages offered by three-dimensional images, with the ease and widespread implementation of the PDF format. In the healthcare work environment, it offers the possibility of pre-surgical planning, prostheses and treatments personalized to the characteristics of each patient. All this is further enhanced by the use of PDF, as it is currently a well-known application standard used by society. It offers a multitude of visualization tools for the use of the model; it allows interaction (movement, rotation, zoom...) and handling that facilitate the work. In addition, these 3D models in PDF are compatible and easily made by 3D printing, being able to obtain reproductions of anatomical parts in great detail, without the need for dissections or biological remains.},
	urldate = {2021-06-04},
	booktitle = {Proceedings of the {Seventh} {International} {Conference} on {Technological} {Ecosystems} for {Enhancing} {Multiculturality}},
	publisher = {Association for Computing Machinery},
	author = {Rico, Roberto D. Tabernero and Méndez, Juan A. Juanes and Prats-Galino, Alberto and González, Sonia F. Pozo},
	month = oct,
	year = {2019},
	keywords = {3D PDF, health science, radiological models, tridimensional imaging},
	pages = {321--325},
	file = {Rico et al. - 2019 - Taking advantage of 3D technology in health scienc.pdf:/Users/tullsen/Zotero/storage/CH6CAB6W/Rico et al. - 2019 - Taking advantage of 3D technology in health scienc.pdf:application/pdf},
}

@inproceedings{emmanuelgiguetDanielFinSBD2Task2020,
	title = {Daniel at the {FinSBD}-2 task :{Extracting} {Lists} and {Sentences} from {PDF} {Documents}:a model-driven end-to-end approach to {PDF} document analysis},
	abstract = {In this paper, we present the method we have de-signed  and  implemented  for  identifying  lists  andsentences  in  PDF  documents  while  participatingto FinSBD-2 Financial Document Analysis SharedTask. We propose a model-driven approach for theFrench and English datasets. It relies on a top-downprocess from the PDF itself in order to keep controlof the workflow. Our objective is to use PDF struc-ture extraction to improve text segment boundariesdetection in an end-to-end fashion.},
	language = {English},
	booktitle = {Proceedings of the {Second} {Workshop} on {Financial} {Technology} and {Natural} {Language} {Processing}},
	author = {{Emmanuel Giguet} and {Gael Lejeune}},
	year = {2020},
	pages = {67--74},
	file = {Emmanuel Giguet and Gael Lejeune - 2020 - Daniel at the FinSBD-2 task Extracting Lists and .pdf:/Users/tullsen/Zotero/storage/PZITIPQB/Emmanuel Giguet and Gael Lejeune - 2020 - Daniel at the FinSBD-2 task Extracting Lists and .pdf:application/pdf},
}

@misc{alane.zuckermanPDFHealthcareChild2008,
	title = {{PDF} for {Healthcare} and {Child} {Health} {Data} {Forms}},
	url = {https://www.researchgate.net/publication/23463768_PDF_for_Healthcare_and_Child_Health_Data_Forms},
	abstract = {PDF-H is a new best practices standard that uses XFA forms and embedded JavaScript to combine PDF forms with XML data. Preliminary experience with AAP child health forms shows that the combination of PDF with XML is a more effective method to visualize familiar data on paper and the web than the traditional use of XML and XSLT. Both PDF-H and HL7 Clinical Document Architecture can co-exist using the same data for different display formats.},
	language = {English},
	urldate = {2019-05-08},
	author = {{Alan E. Zuckerman} and {Joseph H. Schneider} and {Ken Miller}},
	month = feb,
	year = {2008},
	file = {Alan E. Zuckerman et al. - 2008 - PDF for Healthcare and Child Health Data Forms.pdf:/Users/tullsen/Zotero/storage/QG97HIT8/Alan E. Zuckerman et al. - 2008 - PDF for Healthcare and Child Health Data Forms.pdf:application/pdf},
}

@article{hashmiInsightsStateoftheartPDF2020,
	title = {Insights to the state-of-the-art {PDF} {Extraction} {Techniques}},
	volume = {16},
	url = {https://www.researchgate.net/profile/Ahmer_Hashmi2/publication/341990148_Insights_to_the_state-of-the-art_PDF_Extraction_Techniques/links/5edce16c299bf1c67d4af0ba/Insights-to-the-state-of-the-art-PDF-Extraction-Techniques.pdf},
	abstract = {Digitized documents have become the omnipresent medium of information. A plethora of scholarly documents on the web is excessively being increased. Various digital libraries such as Google scholar, Citeseer, MAS etc. store this plethora in different formats. Most of the scientific literature is stored in Portal Document Format (PDF). PDF documents hold a complex structure due to which their comprehension and extraction of useful information from them is a challenging task. In this regard, research community has been proposing different rule based and machine learning based techniques from past several years. We believe that accurate and efficient information extraction form the PDF files is an important issue as major portion of scholarly literature is stored in PDF. This study presents a rigorous analysis of the contemporary state-of-the-art in PDF data extraction. The contemporary approaches from the window of past few years are recapitulated with the primary objective to assist the scientific community by providing them knowledge about current trend in PDF extraction techniques. The study also presents critical analysis and suggests future dimensions of some of the approaches.},
	language = {English},
	journal = {IPSI TRANSACTION ON INTERNET RESEARCH},
	author = {Hashmi, Ahmer M and Qayyum, F and Afzal, Muhammad T},
	year = {2020},
	pages = {8},
	file = {Hashmi et al. - 2020 - Insights to the state-of-the-art PDF Extraction Te.pdf:/Users/tullsen/Zotero/storage/JM22TTV7/Hashmi et al. - 2020 - Insights to the state-of-the-art PDF Extraction Te.pdf:application/pdf},
}

@inproceedings{ahmetovicSignaturesGoFramework2020,
	address = {Virtuell},
	title = {Signatures to {Go}: {A} {Framework} for {Qualified} {PDF} {Signing} on {Mobile} {Devices}},
	shorttitle = {Signatures to {Go}},
	url = {https://graz.pure.elsevier.com/en/publications/signatures-to-go-a-framework-for-qualified-pdf-signing-on-mobile-},
	abstract = {Electronic documents are an important part of a business workflow. To assure the integrity, authenticity, and non-repudiation of those documents, both public and private sectors use qualified electronic signatures to sign PDF files. Benefits of the resulting qualified PDF signing are widely recognized, and there are many desktop and web applications used to sign PDFs. Those applications usually require additional hardware, such as smartphones, or smart cards, to assure a multi-factor authentication in the signing process. However, the prevalence of mobile devices in everyday life posed a need for public services, which can be executed on a single mobile device. In this paper, we develop a user-friendly and privacy-preserving framework for qualified PDF signing on mobile devices. We show the feasibility of our framework by implementing all necessary components: the PDF processing application, the Trust Service Provider server-side, and clientside application. The main focus of these components is to preserve the privacy of users and to meet user expectations regarding the functionalities of PDF signing applications. Furthermore, we demonstrate the practical applicability of our solution by integrating it into the productive Austrian e-Government system.
Lastly, we conclude the paper with extensive performance evaluation.},
	language = {English},
	urldate = {2020-07-18},
	booktitle = {International {Conference} on {Security} and {Cryptography} ({SECRYPT} 2020)},
	author = {Ahmetovic, Emina},
	month = jul,
	year = {2020},
	file = {Ahmetovic - 2020 - Signatures to Go A Framework for Qualified PDF Si.pdf:/Users/tullsen/Zotero/storage/AUH5YCGA/Ahmetovic - 2020 - Signatures to Go A Framework for Qualified PDF Si.pdf:application/pdf},
}

@inproceedings{wangPDF2LaTeXDeepLearning2020,
	address = {New York, NY, USA},
	series = {{DocEng} '20},
	title = {{PDF2LaTeX}: {A} {Deep} {Learning} {System} to {Convert} {Mathematical} {Documents} from {PDF} to {LaTeX}},
	isbn = {978-1-4503-8000-3},
	shorttitle = {{PDF2LaTeX}},
	url = {https://doi.org/10.1145/3395027.3419580},
	doi = {10.1145/3395027.3419580},
	abstract = {The mathematical contents of scientific publications in PDF format cannot be easily analyzed by regular PDF parsers and OCR tools. In this paper, we propose a novel OCR system called PDF2LaTeX, which extracts math expressions and text in both postscript and image-based PDF files and translates them into LaTeX markup. As a preprocessing step, PDF2LaTeX first renders a PDF file into its image format, and then uses projection profile cutting (PPC) to analyze the page layout. The analysis of math expressions and text is based on a series of deep learning algorithms. First, it uses a convolutional neural network (CNN) as a binary classifier to detect math image blocks based on visual features. Next, it uses a conditional random field (CRF) to detect math-text boundaries by incorporating semantics and context information. In the end, the system uses two different models based on a CNN-LSTM neural network architecture to translate image blocks of math expressions and plaintext into the LaTeX representations. For testing, we created a new dataset composed of 102 PDF pages collected from publications on arXiv.org and compared the performance between PDF2LaTeX and the state-of-the-art commercial software InftyReader. The experiment results showed that the proposed system achieved a better recognition accuracy (81.1\%) measured by the string edit distance between the predicted LaTeX and the ground truth.},
	language = {English},
	urldate = {2020-09-28},
	booktitle = {Proceedings of the {ACM} {Symposium} on {Document} {Engineering} 2020},
	publisher = {Association for Computing Machinery},
	author = {Wang, Zelun and Liu, Jyh-Charn},
	month = sep,
	year = {2020},
	keywords = {document analysis, conditional random field, deep learning, Mathematical expressions, PDF-to-LaTeX conversion},
	pages = {1--10},
	file = {Wang and Liu - 2020 - PDF2LaTeX A Deep Learning System to Convert Mathe.pdf:/Users/tullsen/Zotero/storage/M6MEP2RD/Wang and Liu - 2020 - PDF2LaTeX A Deep Learning System to Convert Mathe.pdf:application/pdf},
}

@article{adhataraoExploitationSanitizationHidden2021,
	title = {Exploitation and {Sanitization} of {Hidden} {Data} in {PDF} {Files}: {Do} {Security} {Agencies} {Sanitize} {Their} {PDF} files?},
	url = {https://dl.acm.org/doi/abs/10.1145/3437880.3460405},
	doi = {10.1145/3437880.3460405},
	abstract = {Organizations publish and share more and more electronic documents like PDF files. Unfortunately, most organizations are unaware that these documents can compromise sensitive information like authors names, details on the information system and architecture. All these information can be exploited easily by attackers to footprint and later attack an organization. In this paper, we analyze hidden data found in the PDF files published by an organization. We gathered a corpus of 39664 PDF files published by 75 security agencies from 47 countries. We have been able to measure the quality and quantity of information exposed in these PDF files. It can be effectively used to find weak links in an organization: employees who are running outdated software. We have also measured the adoption of PDF files sanitization by security agencies. We identified only 7 security agencies which sanitize few of their PDF files before publishing. Unfortunately, we were still able to find sensitive information within 65\% of these sanitized PDF files. Some agencies are using weak sanitization techniques: it requires to remove all the hidden sensitive information from the file and not just to remove the data at the surface. Security agencies need to change their sanitization methods.},
	language = {English},
	number = {2021},
	journal = {Proceedings of the 2021 ACM Workshop on Information Hiding and Multimedia Security},
	author = {Adhatarao, Supriya and Lauradoux, Cédric},
	month = mar,
	year = {2021},
	note = {https://arxiv.org/pdf/2103.02707.pdf},
	pages = {35--44},
	annote = {Summary
Seems solid, but some references out-dated (somewhat acknowledged in the paper)
Focuses on "passive footprinting" rather than "active footprinting"
https://github.com/ElevenPaths/FOCA
https://github.com/sowdust/pdfxplr
Levels of Sanitization
https://ctan.org/pkg/pdfprivacy / https://github.com/LaurensS/pdfprivacy- LaTeX package to sanitize PDFs produced by LaTeX
"We have crawled the websites of 75 security agencies mentioned by Wikipedia10 belonging to 47 countries. We have downloaded 39664 PDF files in total. The distribution of PDF files over the agency is not even.We found between 5 to around 6000 PDF files for each agency. Figure 1 shows the discrepancy in the number of PDF files for each country in our dataset.We have used wget command to crawl websites and download these PDF files."
"In order to put the results obtained for security agencies into perspective, we have also downloaded more than 500000 PDF files from scientific repositories (Cryptology ePrint Archive11 and HAL12 with the agreement of the system administrators of these websites."},
	file = {Adhatarao and Lauradoux - Exploitation and Sanitization of Hidden Data in PD.pdf:/Users/tullsen/Zotero/storage/T5VKVC6Y/Adhatarao and Lauradoux - Exploitation and Sanitization of Hidden Data in PD.pdf:application/pdf;3437880.3460405.pdf:/Users/tullsen/Zotero/storage/RLTZ2JAN/3437880.3460405.pdf:application/pdf},
}

@inproceedings{yuPasswordCrackingPDF2021,
	title = {Password {Cracking} of {PDF} 2.0 documents on {GPU}},
	url = {https://ieeexplore.ieee.org/abstract/document/9449188},
	doi = {10.1109/ICCCS52626.2021.9449188},
	abstract = {PDF is one of the international documents standards and it is widely used in the world. Password cracking of the PDF documents has attracted the attention of forensic community. Some forensic tools can crack the password of PDF documents with GPU-acceleration only for PDF l.x documents. This paper studies the password cracking of PDF 2.0 documents, which is the newest PDF standard, on GPU. We analyze the complexity of the password cracking algorithm, and implement it on GPU. We adopt many optimization techniques and our experiments shows that our GPU implementation has about 47 times speedup, compared with the CPU implementation.},
	booktitle = {2021 {IEEE} 6th {International} {Conference} on {Computer} and {Communication} {Systems} ({ICCCS})},
	author = {Yu, Fei and Yin, Hao},
	month = apr,
	year = {2021},
	keywords = {Portable document format, Conferences, Forensics, Communication systems, Complexity theory, GPU, Graphics processing units, password cracking, PDF 2.0, Tools},
	pages = {721--725},
	file = {Yu and Yin - 2021 - Password Cracking of PDF 2.0 documents on GPU.pdf:/Users/tullsen/Zotero/storage/N8RWDZ2V/Yu and Yin - 2021 - Password Cracking of PDF 2.0 documents on GPU.pdf:application/pdf},
}

@techreport{NIAPGovernmentApproved2015,
	address = {USA},
	title = {{NIAP}: {U}.{S}. {Government} {Approved} {Protection} {Profile} - {Extended} {Package} for {Redaction} {Tools}},
	url = {https://www.niap-ccevs.org/Profile/Info.cfm?id=390},
	abstract = {The scope of this Extended Package (EP) is to describe the security functionality of Redaction tools in terms of [CC]. Redaction is the process of selectively removing and replacing information from a document or other logical container of data for release to an audience not intended to view that information. Redacted information is not limited to classified material; other examples include privacy data, proprietary information, trade secrets, and legal strategy. Instances of redaction include replacing classified text with a black box to release a document to an unclassified environment, replacing privacy-related data such as telephone numbers with all Xs to release a database to a contractor, converting a proprietary format document to Portable Document Format (PDF) to release a what-you-see-is-what-you-get (WYSIWYG) document. The risk from improper or incomplete redaction is the inadvertent disclosure of classified or sensitive data.},
	language = {English},
	urldate = {2019-05-27},
	institution = {National Information Assurance Partnership},
	month = dec,
	year = {2015},
	pages = {25},
	file = {NIAP U.S. Government Approved Protection Profile .pdf:/Users/tullsen/Zotero/storage/F47LZW9Y/NIAP U.S. Government Approved Protection Profile .pdf:application/pdf},
}

@misc{adobePDFRedactionAddendum2006,
	title = {{PDF} {Redaction}: {Addendum} for the {PDF} {Reference}, sixth edition, {Version} 1.7},
	copyright = {Copyright Adobe Systems Inc.},
	shorttitle = {{PDF} 1.7 {Redaction} {Addendum}},
	url = {https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdf_reference_archive/pdf_reference_addendum_redaction.pdf},
	abstract = {Redaction is the process of removing data from a page. With printed pages, redaction involves blacking-out or cutting-out areas of the printed page. With electronic documents that use formats such as PDF, redaction involves removing (not just obscuring) content within a region.
Additions to the PDF Reference
This section describes content added to the PDF Reference.
* 8.4.5 Annotation Types summarizes a new entry added to Table 8.20. This table starts on page 615.
* Redaction Annotations describes the new annotation type. It follows the description of the watermark annotation, which ends on page 647.
* H.3 Implementation Notes describes the implementation note mentioned in Redaction Annotations. This note follows the heading 8.4.5, “Annotation Types” (Watermark Annotations) on page 1115.},
	language = {English},
	publisher = {Adobe Systems Inc.},
	editor = {{Adobe}},
	month = sep,
	year = {2006},
	keywords = {Adobe, PDF 1.7},
	file = {Adobe - 2006 - PDF Redaction Addendum for the PDF Reference, six.pdf:/Users/tullsen/Zotero/storage/HEE3T5X2/Adobe - 2006 - PDF Redaction Addendum for the PDF Reference, six.pdf:application/pdf},
}

@inproceedings{nissimALPDActiveLearning2014,
	title = {{ALPD}: {Active} {Learning} {Framework} for {Enhancing} the {Detection} of {Malicious} {PDF} {Files}},
	doi = {10.1109/JISIC.2014.23},
	abstract = {Email communication carrying malicious attachments or links is often used as an attack vector for initial penetration of the targeted organization. Existing defense solutions prevent executables from entering organizational networks via emails, therefore recent attacks tend to use non-executable files such as PDF. Machine learning algorithms have recently been applied for detecting malicious PDF files. These techniques, however, lack an essential element - they cannot be updated daily. In this study we present ALPD, a framework that is based on active learning methods that are specially designed to efficiently assist anti-virus vendors to focus their analytical efforts. This is done by identifying and acquiring new PDF files that are most likely malicious, as well as informative benign PDF documents. These files are used for retraining and enhancing the knowledge stores. Evaluation results show that in the final day of the experiment, Combination, one of our AL methods, outperformed all the others, enriching the anti-virus's signature repository with almost seven times more new PDF malware while also improving the detection model's performance on a daily basis.},
	language = {English},
	booktitle = {2014 {IEEE} {Joint} {Intelligence} and {Security} {Informatics} {Conference}},
	author = {Nissim, N. and Cohen, A. and Moskovitch, R. and Shabtai, A. and Edry, M. and Bar-Ad, O. and Elovici, Y.},
	month = sep,
	year = {2014},
	keywords = {PDF, Portable document format, Feature extraction, Training, Malware, PDF malware, Active Learning, active learning framework, ALPD, antivirus signature repository, attack vector, computer viruses, digital signatures, Electronic mail, email communication, emails, informative benign PDF documents, learning (artificial intelligence), Machine Learning, machine learning algorithms, malicious attachments, malicious PDF file detection, organizational networks, Organizations, Support vector machines},
	pages = {91--98},
	file = {Nissim et al. - 2014 - ALPD Active Learning Framework for Enhancing the .pdf:/Users/tullsen/Zotero/storage/8JNUS8VD/Nissim et al. - 2014 - ALPD Active Learning Framework for Enhancing the .pdf:application/pdf},
}

@article{vatamanuPracticalApproachClustering2012,
	title = {A practical approach on clustering malicious {PDF} documents},
	volume = {8},
	issn = {1772-9904},
	url = {https://doi.org/10.1007/s11416-012-0166-z},
	doi = {10.1007/s11416-012-0166-z},
	abstract = {Starting with 2009, the number of advanced persistent threat attacks has increased. In all of the researched cases, this kind of attacks use a zero-day exploit usually found in a frequently used application. Most of the times, the user has to visit a malicious page or open an infected document sent via e-mail. Even though the attack vector can be found in many forms, this paper addresses the case in which the attack relies on PDF files to deliver the payload. We chose PDF format both because of the high number of attacks it was used in and the key advantages it offers to the attacker. From an attackers perspective, the advantage of this attack is clear in that the PDF-files can be opened by an application on the users computer or in a browser, as most of the browsers support plug-ins that can render PDF files. The use of JavaScript inside PDF files offers two further advantages. The first is that code can be executed on the victims computer while the attack avoids different protection methods. The second benefit is that the JavaScript code can be polymorphic in that two files with the same functionality may look very different. This paper unveils a clustering method based on tokenization of the JavaScript code inside PDF files resistant to most of the obfuscation techniques used in script-based malware pieces. Our clustering method is based on the fact that most of the infected PDF-files (over 93 \%) are using JavaScript code. By tokenizing the JavaScript code, describing it in an abstract manner and eliminating different operators used in polymorphism, we are able to obtain classes of files, very similar syntax-wise that can be easily clustered using different methods. Given the fact that virus analysts would likely analyse classes of files rather than isolated files, their work will be significantly reduced. The method of abstraction can be taken one step further and used as a detection mechanism—a technique to evaluate prevalent data or to obtain a subset from a large set without losing data variability.},
	language = {English},
	number = {4},
	journal = {Journal in Computer Virology},
	author = {Vatamanu, Cristina and Gavriluţ, Dragoş and Benchea, Răzvan},
	month = nov,
	year = {2012},
	keywords = {Hash Table, Manhattan Distance, Obfuscation Technique, Token Class, Virtual Machine},
	pages = {151--163},
}

@inproceedings{maiorcaPatternRecognitionSystem2012,
	title = {A pattern recognition system for malicious pdf files detection},
	abstract = {Malicious PDF files have been used to harm computer security during the past two-three years, and modern antivirus are proving to be not completely effective against this kind of threat. In this paper an innovative technique, which combines a feature extractor module strongly related to the structure of PDF files and an effective classifier, is presented. This system has proven to be more effective than other stateof- the-art research tools for malicious PDF detection, as well as than most of antivirus in commerce. Moreover, its flexibility allows adopting it either as a stand-alone tool or as plug-in to improve the performance of an already installed antivirus.},
	language = {English},
	booktitle = {International {Workshop} on {Machine} {Learning} and {Data} {Mining} in {Pattern} {Recognition}},
	publisher = {Springer},
	author = {Maiorca, Davide and Giacinto, Giorgio and Corona, Igino},
	year = {2012},
	pages = {510--524},
	annote = {Summary
ML approach
Exploits listed: JS (CVS-2009-4324), Actions (CVE-2010-2883), embedded files (CVE-2010-1240), Flash (CVE-2010-3654)
Evasion: GoToEmbedded action, encryption, permissiveness ('parser flexibility').
Methods: Buffer overflow, Return-Oriented Programming, Heap Spray, JIT Spray
Assumes malicious code always in a data stream - describes ML feature extractor based on frequency of keywords. Seems overly simplistic... also can be countered easily by adding keywords.
Mentions Wepawet and PJScan and PDFid
Lists sources (corpora) of malicious PDFs - Contagio [11]},
	file = {Maiorca et al. - 2012 - A pattern recognition system for malicious pdf fil.pdf:/Users/tullsen/Zotero/storage/AYDSJC2Y/Maiorca et al. - 2012 - A pattern recognition system for malicious pdf fil.pdf:application/pdf},
}

@misc{marcstevensSHAtteredSHA1Collision2017,
	title = {{SHAttered} ({SHA1} collision)},
	url = {https://shattered.io/},
	abstract = {We have broken SHA-1 in practice.

This industry cryptographic hash function standard is used for digital signatures and file integrity verification, and protects a wide spectrum of digital assets, including credit card transactions, electronic documents, open-source software repositories and software updates.
It is now practically possible to craft two colliding PDF files and obtain a SHA-1 digital signature on the first PDF file which can also be abused as a valid signature on the second PDF file.
For example, by crafting the two colliding PDF files as two rental agreements with different rent, it is possible to trick someone to create a valid signature for a high-rent contract by having him or her sign a low-rent contract.},
	language = {English},
	author = {{Marc Stevens} and {Elie Bursztein} and {Pierre Karpman} and {Ange Albertini} and {Yarik Markov}},
	month = feb,
	year = {2017},
	keywords = {PDF, SHA1},
	file = {Jaspers - Hash Function Sha-1 Is Broken.html:/Users/tullsen/Zotero/storage/QKDVAUEI/Jaspers - Hash Function Sha-1 Is Broken.html:text/html;Marc Stevens et al. - 2017 - SHAttered (SHA1 collision).pdf:/Users/tullsen/Zotero/storage/2ZBPIEYW/Marc Stevens et al. - 2017 - SHAttered (SHA1 collision).pdf:application/pdf;SHAttered:/Users/tullsen/Zotero/storage/S2T2NVHC/shattered.io.html:text/html;shattered-2.pdf:/Users/tullsen/Zotero/storage/CAQ2SXHI/shattered-2.pdf:application/pdf;shattered-1.pdf:/Users/tullsen/Zotero/storage/BKP7K7RM/shattered-1.pdf:application/pdf;infographic.pdf:/Users/tullsen/Zotero/storage/8CP36YH4/infographic.pdf:application/pdf},
}

@misc{juliawolfOMGWTFPDFSecTor20102010,
	title = {{OMG}-{WTF}-{PDF} {\textbar} {SecTor} 2010},
	shorttitle = {{OMG} {WTF} {PDF}},
	url = {https://sector.ca/sessions/omg-wtf-pdf/},
	abstract = {Ambiguities in the PDF specification means that no two PDF parsers will see a file in the same way. This leads to many opportunities for exploit obfuscation.},
	language = {English},
	author = {{Julia Wolf}},
	month = mar,
	year = {2010},
	file = {Julia Wolf - OMG-WTF-PDF  SecTor 2010.pdf:/Users/tullsen/Zotero/storage/7BKWDRKG/Julia Wolf - OMG-WTF-PDF  SecTor 2010.pdf:application/pdf},
}

@misc{80NonHTMLDocuments2013,
	title = {80\% of non-{HTML} documents posted online are {PDFs}. {Deal} with it. – {Duff} {Johnson} {Strategy} \& {Communications}},
	url = {http://duff-johnson.com/2013/01/15/80-percent-of-non-html-documents-posted-online-are-pdfs/},
	language = {English},
	urldate = {2019-05-06},
	month = jan,
	year = {2013},
}

@techreport{ronbrandisThreatModellingAdobe2012,
	title = {Threat {Modelling} {Adobe} {PDF}},
	shorttitle = {{DSTO}},
	abstract = {PDF documents are increasingly being used as an attack vector to compromise and execute malicious code on victim machines. Such attacks threaten the assets of any organisation which they can exploit. PDF documents appeal to attackers due to their wide spread use and because users consider them to be safe. In this paper we analyse the threats posed by PDF documents. We outline current exploits, security defences employed by the Acrobat PDF reader; obfuscation techniques used by attackers to avoid detection; and threats to Adobe Acrobat. We also describe a tool we developed to assist in the identification of potentially malicious code in PDF documents.},
	language = {English},
	number = {DSTO-TR-2730},
	institution = {Australian Government Department of Defence, Defence Science and Technology Organisation},
	author = {{Ron Brandis} and {Luke Steller}},
	month = aug,
	year = {2012},
	note = {Unclassified},
	pages = {135},
	file = {Ron Brandis and Luke Steller - 2012 - Threat Modelling Adobe PDF.pdf:/Users/tullsen/Zotero/storage/LK4TGAYU/Ron Brandis and Luke Steller - 2012 - Threat Modelling Adobe PDF.pdf:application/pdf},
}

@techreport{vinodhgopalHighPerformanceDEFLATE2011,
	address = {USA},
	title = {High {Performance} {DEFLATE} {Compression} on {Intel}® {Architecture} {Processors}},
	copyright = {Copyright Intel},
	url = {https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/ia-deflate-compression-paper.pdf},
	abstract = {There is a critical need for lossless data compression in enterprise storage and applications such as databases and web servers, which process huge amounts of data. DEFLATE is a widely used standard to perform lossless compression, and forms the basis of utilities such as gzip and libraries such as Zlib. In these applications, compression imposes a large computational burden on the servers, and they could benefit from a highly optimized implementation. This paper describes the performance characteristics of fast prototype implementations of DEFLATE compression, on Intel® processors based on the 32-nm micro-architecture. As the performance of compression is data dependent, we report the performance on various industry standard corpora data sets.
This paper describes the performance characteristics of fast prototype implementations of DEFLATE compression. In terms of throughput, we are able to perform DEFLATE compression at the aggregate rate of {\textasciitilde}2.7 Gigabits/sec on the Calgary Corpus data-set, on a single core of an Intel® Core™ i5 650 processor.
Our fastest DEFLATE compression implementation is {\textasciitilde}4.5 times as fast as the fastest mode of the best open source version of Zlib compression, on the Intel® Core™ i5 processor 650. To achieve such large performance gains, we sacrifice a small amount of compressibility compared to Zlib-1.2},
	language = {English},
	number = {326352-001},
	institution = {Intel},
	author = {{Vinodh Gopal} and {Jim Guilford} and {Wajdi Feghali} and {Erdinc Ozturk} and {Gil Wolrich}},
	month = nov,
	year = {2011},
	pages = {15},
	file = {Vinodh Gopal et al. - 2011 - High Performance DEFLATE Compression on Intel® Arc.pdf:/Users/tullsen/Zotero/storage/474VDKPF/Vinodh Gopal et al. - 2011 - High Performance DEFLATE Compression on Intel® Arc.pdf:application/pdf},
}

@misc{tylabsPDFExaminerPdfMalware,
	title = {{PDFExaminer}: pdf malware analysis},
	url = {http://www.pdfexaminer.com/},
	author = {{Tylabs}},
}

@misc{rutkowskaInvisibleThingsLab2013,
	title = {The {Invisible} {Things} {Lab}'s blog: {Converting} untrusted {PDFs} into trusted ones: {The} {Qubes} {Way}},
	shorttitle = {The {Invisible} {Things} {Lab}'s blog},
	url = {http://theinvisiblethings.blogspot.com/2013/02/converting-untrusted-pdfs-into-trusted.html},
	language = {English},
	journal = {The Invisible Things Lab's blog},
	author = {Rutkowska, Joanna},
	month = feb,
	year = {2013},
	annote = {Summary
Basically just rasterizes the PDF in a disposable VM.
Looses all the value propositions of PDF!},
}

@misc{philippelagadecWeaponizedPDFPayload2017,
	title = {Weaponized {PDF} - {Payload} {Delivery} {Format} {\textbar} {Decalage}},
	copyright = {Creative Commons Attribution-Noncommercial-Share Alike 3.0 Unported License},
	url = {https://www.decalage.info/file_formats_security/pdf},
	abstract = {This article describes the PDF file format, related security issues and useful resources. [WORK IN PROGRESS]
The original location of this article is http://www.decalage.info/file\_formats\_security/pdf
Last update: 2017-11-10 (created 2010-02-13)},
	author = {{Philippe Lagadec}},
	month = nov,
	year = {2017},
	keywords = {PDF},
}

@article{nissimKeepingPaceCreation2016,
	title = {Keeping pace with the creation of new malicious {PDF} files using an active-learning based detection framework},
	volume = {5},
	issn = {2190-8532},
	url = {https://doi.org/10.1186/s13388-016-0026-3},
	doi = {10.1186/s13388-016-0026-3},
	abstract = {Attackers increasingly take advantage of naive users who tend to treat non-executable files casually, as if they are benign. Such users often open non-executable files although they can conceal and perform malicious operations. Existing defensive solutions currently used by organizations prevent executable files from entering organizational networks via web browsers or email messages. Therefore, recent advanced persistent threat attacks tend to leverage non-executable files such as portable document format (PDF) documents which are used daily by organizations. Machine Learning (ML) methods have recently been applied to detect malicious PDF files, however these techniques lack an essential element—they cannot be efficiently updated daily. In this study we present an active learning (AL) based framework, specifically designed to efficiently assist anti-virus vendors focus their analytical efforts aimed at acquiring novel malicious content. This focus is accomplished by identifying and acquiring both new PDF files that are most likely malicious and informative benign PDF documents. These files are used for retraining and enhancing the knowledge stores of both the detection model and anti-virus. We propose two AL based methods: exploitation and combination. Our methods are evaluated and compared to existing AL method (SVM-margin) and to random sampling for 10 days, and results indicate that on the last day of the experiment, combination outperformed all of the other methods, enriching the signature repository of the anti-virus with almost seven times more new malicious PDF files, while each day improving the detection model’s capabilities further. At the same time, it dramatically reduces security experts’ efforts by 75 \%. Despite this significant reduction, results also indicate that our framework better detects new malicious PDF files than leading anti-virus tools commonly used by organizations for protection against malicious PDF files.},
	number = {1},
	journal = {Security Informatics},
	author = {Nissim, Nir and Cohen, Aviad and Moskovitch, Robert and Shabtai, Asaf and Edri, Matan and BarAd, Oren and Elovici, Yuval},
	month = feb,
	year = {2016},
	pages = {1},
	file = {Nissim et al. - 2016 - Keeping pace with the creation of new malicious PD.pdf:/Users/tullsen/Zotero/storage/KBSLBQNL/Nissim et al. - 2016 - Keeping pace with the creation of new malicious PD.pdf:application/pdf},
}

@misc{karthikselvarajRisePDFMalware2010,
	title = {The {Rise} of {PDF} {Malware}},
	url = {http://www.symantec.com/connect/blogs/rise-pdf-malware},
	abstract = {We have seen an ever increasing use of PDFs for malicious purposes over the past two years.},
	language = {English},
	journal = {Symantec Security Response},
	author = {{Karthik Selvaraj} and {Nino Fred Gutierrez}},
	month = aug,
	year = {2010},
	file = {The Rise of PDF Malware.pdf:/Users/tullsen/Zotero/storage/YUK5H5MP/The Rise of PDF Malware.pdf:application/pdf},
}

@phdthesis{liuUnderstandingDetectingNewly2018,
	type = {Thesis},
	title = {Understanding and detecting newly emerging attack vectors in cybercrimes},
	url = {http://udspace.udel.edu/handle/19716/23788},
	abstract = {Numerous efforts have been devoted to securing computer systems in the past decade, which has rendered many previously popular attacks ineffective. In response to the arms race, cybercriminals are constantly seeking for new attack vectors. In this dissertation, we investigate several newly emerging attack vectors in cybercrime.
Our research first focuses on embedded malware inside Adobe PDF (Portable Document Format) documents. Due to its widespread use and Javascript support, PDF has become the primary vehicle for delivering embedded exploits since 2008. Unfortunately, existing defenses are limited in effectiveness, prone to evasion, or computationally expensive to be employed as on-line protection systems. To this end, we propose a context-aware approach for detection and confinement of malicious Javascript in PDF documents. Based on more than twenty thousand benign and malicious samples, our experimental evaluation shows that our defense system can achieve very high detection accuracy with minor overhead.
We further conduct the first comprehensive study on domain shadowing, a new strategy adopted by miscreants to build their attack infrastructures. We design a novel domain shadowing detector called Woodpecker, which characterizes shadowed domains based on a set of 17 novel features. By applying Woodpecker to the daily feeds of VirusTotal collected in two months, we can detect thousands of new domain shadowing campaigns. Our study highlights domain shadowing as an increasingly rampant threat since 2014. 
Moreover, we discover a new security threat caused by dangling records in DNS. In a dangling DNS record (Dare), the resources pointed to by the DNS record are invalid, but the record itself has not yet been purged from DNS. Our work reveals that Dare can be easily manipulated by adversaries for domain hijacking. In particular, we identify three attack vectors that an adversary can harness to exploit Dares. In a large-scale measurement study, we uncover 467 exploitable Dares in 277 Alexa top 10,000 domains and 52 edu zones, showing that Dare is a real, prevalent threat.
Finally, we present a novel defense called pSweeper to robustly protect against use-after-free (UaF) exploits with low overhead and pinpoint the root-causes of UaF vulnerabilities with one safe crash. The success of pSweeper lies in its two unique and innovative techniques: concurrent pointer sweeping (CPS) and object origin tracking (OOT). Unlike previous works that rely on pointer propagation tracking to find dangling pointers, CPS iteratively sweeps all live pointers in a concurrent thread to find dangling ones. OOT can help to pinpoint the root-causes by informing developers of how a dangling pointer is caused. We implement a prototype of pSweeper and validate its efficacy in real scenarios.},
	language = {en},
	school = {University of Delaware},
	author = {Liu, Daiping},
	year = {2018},
	file = {Liu - 2018 - Understanding and detecting newly emerging attack .pdf:/Users/tullsen/Zotero/storage/5BWGNVG5/Liu - 2018 - Understanding and detecting newly emerging attack .pdf:application/pdf},
}

@misc{vladislavmladenovPDFSignatureSpoofing2018,
	title = {{PDF} {Signature} {Spoofing}},
	url = {https://www.pdf-insecurity.org/index.html},
	author = {{Vladislav Mladenov} and {Christian Mainka} and {Karsten Meyer zu Selhausen} and {Martin Grothe} and {Jorg Schwenk}},
	month = nov,
	year = {2018},
	keywords = {PDF, ByteRange, Digital Signature},
	file = {Vladislav Mladenov et al. - PDF Signature Spoofing.pdf:/Users/tullsen/Zotero/storage/7I68NM8C/Vladislav Mladenov et al. - PDF Signature Spoofing.pdf:application/pdf;PDF Signature Spoofing.pdf:/Users/tullsen/Zotero/storage/QL5V5ZSB/PDF Signature Spoofing.pdf:application/pdf;DIGITALVERSION_KMeyerZuSelhausen_SecurityOfPDFSignatures_2018-11-25.pdf:/Users/tullsen/Zotero/storage/LG9QJDTX/DIGITALVERSION_KMeyerZuSelhausen_SecurityOfPDFSignatures_2018-11-25.pdf:application/pdf},
}

@inproceedings{andreasbogkPitfallsProtocolDesign2014,
	address = {San Jose, CA},
	title = {The {Pitfalls} of {Protocol} {Design}: {Attempting} to {Write} a {Formally} {Verified} {PDF} {Parser}},
	isbn = {978-1-4799-5103-1},
	shorttitle = {The {Pitfalls} of {Protocol} {Design}},
	url = {http://ieeexplore.ieee.org/document/6957304/},
	doi = {10.1109/SPW.2014.36},
	abstract = {Parsers for complex data formats generally present a big attack surface for input-driven exploitation. In practice, this has been especially true for implementations of the PDF data format, as witnessed by dozens of known vulnerabilities exploited in many real world attacks, with the Acrobat Reader implementation being the main target. In this report, we describe our attempts to use Coq, a theorem prover based on a functional programming language making use of dependent types and the Curry-Howard isomorphism, to implement a formally veriﬁed PDF parser. We ended up implementing a subset of the PDF format and proving termination of the combinator-based parser. Noteworthy results include a dependent type representing a list of strictly monotonically decreasing length of remaining symbols to parse, which allowed us to show termination of parser combinators. Also, difﬁculties showing termination of parsing some features of the PDF format readily translated into denial of service attacks against existing PDF parsers—we came up with a single PDF ﬁle that made all the existing PDF implementations we could test enter an endless loop.},
	language = {English},
	booktitle = {2014 {IEEE} {Security} and {Privacy} {Workshops}},
	publisher = {IEEE},
	author = {{Andreas Bogk} and {Marco Schopl}},
	month = may,
	year = {2014},
	pages = {198--203},
	file = {Bogk and Schopl - 2014 - The Pitfalls of Protocol Design Attempting to Wri.pdf:/Users/tullsen/Zotero/storage/V2ANNUVR/Bogk and Schopl - 2014 - The Pitfalls of Protocol Design Attempting to Wri.pdf:application/pdf},
}

@misc{didierstevensFreeMaliciousPDF2010,
	title = {Free {Malicious} {PDF} {Analysis} {E}-book},
	url = {https://blog.didierstevens.com/2010/09/26/free-malicious-pdf-analysis-e-book/},
	abstract = {The title says it all… This is a document I shared with my Brucon workshop attendees. I know, this is a PDF document, you’ve to appreciate the irony ;-)},
	language = {en},
	journal = {Didier Stevens},
	author = {{Didier Stevens}},
	month = sep,
	year = {2010},
	file = {2010 - Free Malicious PDF Analysis E-book.pdf:/Users/tullsen/Zotero/storage/XP6Z559G/2010 - Free Malicious PDF Analysis E-book.pdf:application/pdf},
}

@misc{isotc171sc2wg5ISO19005320122012,
	title = {{ISO} 19005-3:2012 {Document} management - {Electronic} document file format for long-term preservation - {Part} 3: {Use} of {ISO} 32000-1 with support for embedded files ({PDF}/{A}-3)},
	copyright = {Copyright ISO},
	shorttitle = {{ISO} 19005-3 ({PDF}/{A}-3)},
	url = {http://www.iso.org/cms/render/live/en/sites/isoorg/contents/data/standard/05/72/57229.html},
	abstract = {Document management -- Electronic document file format for long-term preservation -- Part 3: Use of ISO 32000-1 with support for embedded files (PDF/A-3)},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 171 SC 2 WG 5}},
	month = oct,
	year = {2012},
	note = {PDF cannot be attached due to copyright restrictions. This standard must be purchased.},
	keywords = {PDF/A, ISO, ISO 19005, ISO 32000},
}

@misc{isotc171sc2wg8ISO32000220172017,
	title = {{ISO} 32000-2:2017 {Document} management - {Portable} document format - {Part} 2: {PDF} 2.0},
	copyright = {Copyright ISO},
	shorttitle = {{ISO} 32000-2:2017 ({PDF} 2.0)},
	url = {http://www.iso.org/cms/render/live/en/sites/isoorg/contents/data/standard/06/35/63534.html},
	abstract = {PDF enables users to exchange and view electronic documents easily and reliably, independent of the environment in which they were created or the environment in which they are viewed or printed.
At the core of PDF is an advanced imaging model derived from the PostScript®1 page description language. This PDF Imaging Model enables the description of text and graphics in a device-independent and resolution-independent manner at a complete, precise and professional level. Unlike PostScript, which is a programming language, PDF is based on a structured binary file format that is optimised for high performance in interactive viewing.
PDF includes objects such as annotations and hypertext links that are not part of the page content itself but are useful for interactive viewing and document interchange. PDF also includes data structures such as tagged PDF, XMP and an associated files mechanism, that are useful for document management and content reuse.},
	language = {English},
	publisher = {ISO},
	editor = {{ISO TC 171 SC 2 WG 8}},
	month = jul,
	year = {2017},
	note = {PDF cannot be attached due to copyright restrictions. This standard must be purchased.},
	keywords = {PDF 2.0, ISO, ISO 32000},
}

@misc{isotc130wg2ISO16613120172017,
	title = {{ISO} 16613-1:2017 {Graphic} technology - {Variable} content replacement - {Part} 1: {Using} {PDF}/{X} for variable content replacement ({PDF}/{VCR}-1)},
	copyright = {Copyright ISO},
	shorttitle = {{ISO} 16613-1 ({PDF}/{VCR}-1)},
	url = {http://www.iso.org/cms/render/live/en/sites/isoorg/contents/data/standard/06/54/65429.html},
	abstract = {Graphic technology -- Variable content replacement -- Part 1: Using PDF/X for variable content replacement (PDF/VCR-1)},
	language = {English},
	publisher = {ISO},
	editor = {{ISO TC 130 WG 2}},
	month = aug,
	year = {2017},
	note = {PDF cannot be attached due to copyright restrictions. This standard must be purchased.},
	keywords = {ISO, ISO 16613, PDF/VCR},
}

@misc{CompactPDF2003,
	title = {{CompactPDF}},
	url = {http://multivalent.sourceforge.net/Research/CompactPDF.html},
	abstract = {Compact PDF is an extension to PDF 1.5 to give additional compression of between 30\% and 60\%. Compact PDF introduces three techniques over what is defined in PDF 1.5 to achieve greater compression:
• Bulk compression of most objects in same stream
PDF 1.5 introduces object streams which are very effective in compressing hyperlinks, which can be plentiful and were previously uncompressed. However, pages cannot be put into object streams and must be compressed separately. This is unfortunate for the general purpose compression algorithms (LZW and Flate), since they achieve compression by replacing patterns previously seen with short indexes to the pattern, and with less text there is less opportunity for sharing. Compact PDF compresses all or most pages together, into a compact stream. 
• /BZip2Decode
PDF includes LZW and Flate general purpose compression algorithms. In almost all cases Flate compresses better than LZW. Flate is fast and compresses well. However, for plain text, BZip2 often compresses better, and underneath the compression and encryption, PDF pages are text-based command streams. Compact PDF accepts BZip2 compression. 
• Unencrypted Type 1 fonts (/Type1U)
Type 1 fonts are encrypted for historical reasons. Encrypted data reduces the effectiveness of compressors so Compact PDF decrypts Type 1 so that they are amenable to compression.},
	language = {English},
	publisher = {Multivalent},
	year = {2003},
	keywords = {PDF 1.5},
}

@misc{desrochersLibraryWebArchives2019,
	type = {webpage},
	title = {In the {Library}’s {Web} {Archives}: {Sorting} through a {Set} of {US} {Government} {PDFs} {\textbar} {The} {Signal}},
	shorttitle = {In the {Library}’s {Web} {Archives}},
	url = {//blogs.loc.gov/thesignal/2019/03/in-the-librarys-web-archives-sorting-through-a-set-of-us-government-pdfs/},
	abstract = {In the Library’s Web Archives: Sorting through a Set of US Government PDFs. A blog post at "The Signal" on 2019-03-06.},
	language = {eng},
	urldate = {2019-05-06},
	author = {DesRochers, Aly},
	month = mar,
	year = {2019},
	annote = {Summary
1000 PDF files from US Library of Congress (LoC).
All items that were posted on publicly accessible US .gov domains at the time that the Library archived the resources, based on reported MIME type (so may not be 100\% accurate!).
Web page presents some very basic statistics (metrics) about the corpora - e.g. distributions of PDF version and number of pages.
Distributed in BagIt form (RFC 8493).
 },
	file = {DesRochers - 2019 - In the Library’s Web Archives Sorting through a S.html:/Users/tullsen/Zotero/storage/32NHZBY9/DesRochers - 2019 - In the Library’s Web Archives Sorting through a S.html:text/html},
}

@misc{DigitalCorporaGovdocs12009,
	title = {Digital {Corpora} - {Govdocs1}},
	url = {https://digitalcorpora.org/corpora/files},
	abstract = {In recent years a significant amount of forensic research has involved the analysis of files or file fragments. In the absence of such corpora, researchers and students who wish to work with files first need to collect files—a surprisingly difficult task if one wishes a large number of files of many types from a variety of sources. Although many files can be freely downloaded from the web, building and running a high-performance document discovery and downloading tool is not a trivial task. Once files are downloaded they need to be analyzed, characterized and curated. Finally, many corpora that might be assembled cannot be easily redistributed due to privacy or copyright concerns.
For these reasons, we have created and released a corpus of 1 million documents that are freely available for research and may be (to the best of our knowledge) freely redistributed. These documents were obtained by performing searches for words randomly chosen from the Unix dictionary, numbers randomly chosen between 1 and 1 million, and randomized combinations of the two, for documents of specified file types that resided on web servers in the .gov domain using the Yahoo an Google search engines.
Each file in the corpus is presented as a numbered file with a file extension (e.g. 0000001.jpg). The file extension is typically the file extension that was provided to us when the file was downloaded. The file extension is a suggestion—it is not part of the corpus.},
	urldate = {2019-04-27},
	year = {2009},
	note = {Garfinkel, Farrell, Roussev and Dinolt, "Bringing Science to Digital Forensics with Standardized Forensic Corpora", DFRWS 2009, Montreal, Canada},
	annote = {Summary
Corpus of almost 1M documents, including many PDF files.
Documents were obtained by performing searches for words randomly chosen from the Unix dictionary, numbers randomly chosen between 1 and 1 million, and randomized combinations of the two, for documents of specified file types that resided on web servers in the .gov domain using the Yahoo and Google search engines.
Each file in the corpus is presented as a numbered file with a file extension (e.g. 0000001.jpg). The file extension is typically the file extension that was provided to us when the file was downloaded.},
	file = {2009 - Digital Corpora - Govdocs1.pdf:/Users/tullsen/Zotero/storage/MP2IUY6J/2009 - Digital Corpora - Govdocs1.pdf:application/pdf},
}

@misc{GhentOutputSuite2007,
	title = {Ghent {Output} {Suite} 3.0},
	url = {https://www.gwg.org/ghent-output-suite/},
	abstract = {In version 3.0 we have made some minor adjustments to some of the existing patches and documentation to make them clearer and easier to understand. We have also introduced four new patches. Since we have changed so much, we have removed the older versions from the site and included the entire updated suite in this download.},
	urldate = {2019-05-08},
	publisher = {Ghent Work Group (GWG)},
	month = dec,
	year = {2007},
}

@misc{BavariaTestSuite2013,
	title = {Bavaria {Test} {Suite}},
	shorttitle = {Bavaria {Test} {Suite} ({PDF}/{A}-1)},
	url = {https://github.com/bfosupport/pdfa-testsuite},
	urldate = {2019-05-08},
	publisher = {Big Faceless Organization (BFO)},
	month = dec,
	year = {2013},
	note = {original-date: 2013-12-06T14:56:47Z},
}

@misc{CalPolyGraphic2013,
	title = {Cal {Poly} {Graphic} {Communications} {PDF}/{VT} {Test} {File} {Suite} 1.0.1},
	shorttitle = {{CalPoly} {PDF}/{VT} {Test} {Suite} 1.0.1},
	url = {https://www.pdfa.org/resource/cal-poly-pdfvt-test-suite/},
	abstract = {The Cal Poly Graphic Communications PDF/VT Test File Suite provides a collection of four sets of graphically-rich, robust, valid PDF/VT files for testing and demonstrating products claiming support of consumption of files conforming to the ISO 16612-2 PDF/VT standard. The PDF Association has graciously agreed to host the test suites as well as the documentation for same. …},
	urldate = {2019-05-08},
	publisher = {PDF Association},
	month = sep,
	year = {2013},
	note = {Please note the following restrictions associated with the test files:
These files may not be publicly redistributed from any other venue than the PDF Association website.
The files of the Cal Poly GrC PDF/VT Test File Suite may be freely distributed and used for testing and demonstration purposes, public or private, with the condition that they may not be altered in any manner
All digital imagery used in these documents was donated by the student participants in the Cal Poly GrC PDF/VT Test File Project. The test files may be distributed, printed, and displayed with the imagery intact. However, the imagery may not be extracted and used for any other purposes whatsoever.
The source InDesign documents, digital images, and data files are not available for distribution. The publicly-distributed test files consist strictly of the PDF/VT-1 files generated from such documents, digital images, and data files.
The Cal Poly GrC PDF/VT Test File Suite is offered as-is with no implied warranty of any type.},
}

@misc{IsartorTestSuite2011,
	title = {Isartor {Test} {Suite} ({PDF}/{A}-1)},
	shorttitle = {Isartor {PDF}/{A}-1 {Test} {Suite}},
	url = {https://www.pdfa.org/resource/isartor-test-suite/},
	abstract = {The Isartor Test Suite is named after the location of the first meeting of the Technical Working Group, a medieval gate towards the river Isar in the center of Munich/Germany. This test suite comprises a set of files which can be used to check the conformance of software regarding the PDF/A-1 standard. More precisely, the …},
	urldate = {2019-05-08},
	publisher = {PDF Association},
	month = aug,
	year = {2011},
}

@misc{MatterhornProtocol022014,
	title = {The {Matterhorn} {Protocol} 1.02 ({PDF}/{UA}-1)},
	url = {https://www.pdfa.org/resource/the-matterhorn-protocol-1-02/},
	abstract = {To promote adoption of PDF/UA by software developers and document testers alike, the PDF Association’s PDF/UA Competence Center developed the Matterhorn Protocol, a list of all the possible ways to fail PDF/UA. Following the requirements of PDF/UA, the document consists of 31 Checkpoints comprised of 136 Failure Conditions. The 1.02 release of the Matterhorn Protocol, …},
	urldate = {2019-05-08},
	publisher = {PDF Association},
	month = apr,
	year = {2014},
}

@misc{SumatraPDFReaderContribute2012,
	title = {{SumatraPDF} reader. {Contribute} to sumatrapdfreader/sumatrapdf development by creating an account on {GitHub}},
	url = {https://github.com/sumatrapdfreader/sumatrapdf},
	urldate = {2019-05-06},
	publisher = {sumatrapdfreader},
	note = {original-date: 2012-10-02T06:59:55Z},
	annote = {Summary
Sumatra PDF is a free PDF, eBook (ePub, Mobi), XPS, DjVu, CHM, Comic Book (CBZ and CBR) reader for Windows. This is the bug database which can be mined for unusual PDFs...},
}

@misc{OpenlylicensedCorpusSmall2012,
	title = {An openly-licensed corpus of small example files, covering a wide range of formats and creation tools.: openpreserve/format-corpus},
	shorttitle = {An openly-licensed corpus of small example files, covering a wide range of formats and creation tools.},
	url = {https://github.com/openpreserve/format-corpus},
	urldate = {2019-03-11},
	publisher = {Open Preservation Foundation},
	note = {original-date: 2012-06-27T13:38:00Z},
	annote = {Summary
An openly-licensed corpus of small example files, covering a wide range of formats and creation tools.
All items, apart from the source code under 'tools', is CC0 licensed unless otherwise stated. The source code is Apache 2.0 Licensed unless otherwise stated. Uses Apache Tika and DROID to identify file types.
Corpus includes various PDF-related collections:

"PDF Cabinet of Horrors"
"govdocs1-error-pdfs" - pdfs from Govdocs1 that are potentially broken

Also includes valid and potentially invalid image formats (e.g. JPEG, JPEG 2000, etc) which might be very useful to convert to PDF (using a variety of tools) to create a synthetic corpora
 
 },
}

@misc{AltonaTestSuite2011,
	title = {Altona {Test} {Suite} 2},
	shorttitle = {Altona 2 ({PDF}/{X}-4)},
	url = {http://www.eci.org/en/projects/ats},
	abstract = {Essentially in line with the philosophy for version 1 of Altona Test Suite, version 2 as well intends to provide a qualitative benchmark that reflects the minimum level of technical capabilities for processing PDF/X-4 files in a sufficiently predictable and repeatable manner. When version 1 of the Altona Test Suite came out about a decade ago, a substantial number of solutions in the market did not process all of the test elements perfectly. Nevertheless within one to two years, almost all professional solutions would readily process all test elements correctly. All in all the first version of the Altona Test Suite contributed substantially to a much higher level of quality in solutions – and interoperability between solutions – across the whole market. The ECI expects a similar pattern around the release of Altona Test Suite 2.},
	urldate = {2019-05-08},
	publisher = {European Color Initiative (ECI)},
	month = dec,
	year = {2011},
}

@misc{PrintTestTools,
	address = {USA},
	title = {Print {Test} {Tools} - {Automated} {Imaging} {Test} {Tools}},
	url = {https://www.qualitylogic.com/test-tools/printer-test-tools/},
	abstract = {QualityLogic provides critical print test tools and printer and MFP testing services to get your products to market efficiently and effectively while keeping your development costs down. Our printer test tools have been recognized as industry-leading solutions for over 30 years.},
	urldate = {2019-03-11},
	publisher = {QualityLogic Inc. (USA)},
	annote = {Summary
QualityLogic Inc. (USA) provides a number of commercial test suites related to PDF, including both real and synthetic PDFs.
ATS = Application Test Suite
FTA = Functional Test Suite.},
}

@misc{pdfassociationPDFRaster2017,
	title = {{PDF}/{Raster} 1.0},
	copyright = {© PDF Association and TWAIN Working Group 2017},
	shorttitle = {{PDF}/raster 1.0},
	url = {https://www.pdfa.org/resource/pdfraster-1-0/},
	abstract = {This document describes PDF/raster, a strict subset of the PDF file format designed for storing, transporting and exchanging multi-page raster-image documents.},
	language = {English},
	urldate = {2019-05-07},
	publisher = {PDF Association and TWAIN Working Group},
	collaborator = {{PDF Association} and {TWAIN Working Group}},
	month = jul,
	year = {2017},
	keywords = {PDF/raster},
	file = {PDF Association and TWAIN - 2017 - PDFRaster 1.0.pdf:/Users/tullsen/Zotero/storage/CFXYECLK/PDF Association and TWAIN - 2017 - PDFRaster 1.0.pdf:application/pdf},
}

@misc{isotc130wg2ISO16612220102010,
	title = {{ISO} 16612-2:2010 {Graphic} technology - {Variable} data exchange - {Part} 2: {Using} {PDF}/{X}-4 and {PDF}/{X}-5 ({PDF}/{VT}-1 and {PDF}/{VT}-2)},
	copyright = {Copyright ISO},
	shorttitle = {{ISO} 16612-2 ({PDF}/{VT})},
	url = {http://www.iso.org/cms/render/live/en/sites/isoorg/contents/data/standard/04/64/46428.html},
	abstract = {Graphic technology -- Variable data exchange -- Part 2: Using PDF/X-4 and PDF/X-5 (PDF/VT-1 and PDF/VT-2)},
	language = {English},
	urldate = {2019-05-07},
	publisher = {ISO},
	editor = {ISO TC 130 WG 2},
	month = aug,
	year = {2010},
	note = {PDF cannot be attached due to copyright restrictions. This standard must be purchased.},
	keywords = {ISO, ISO 16612, PDF 1.6, PDF/VT},
}

@misc{isotc171sc2wg9ISO14289120122012,
	title = {{ISO} 14289-1:2012 {Document} management applications - {Electronic} document file format enhancement for accessibility - {Part} 1: {Use} of {ISO} 32000-1 ({PDF}/{UA}-1)},
	copyright = {Copyright ISO},
	shorttitle = {{ISO} 14289-1 ({PDF}/{UA}-1) {WITHDRAWN}},
	url = {http://www.iso.org/cms/render/live/en/sites/isoorg/contents/data/standard/05/45/54564.html},
	abstract = {Document management applications -- Electronic document file format enhancement for accessibility -- Part 1: Use of ISO 32000-1 (PDF/UA-1)

WITHDRAWN},
	language = {English},
	urldate = {2019-05-07},
	publisher = {ISO},
	editor = {{ISO TC 171 SC 2 WG 9}},
	month = jul,
	year = {2012},
	note = {WITHDRAWN},
	keywords = {ISO, ISO 32000, ISO 14289, PDF/UA},
}

@misc{isotc130wg2ISO15930720082008,
	title = {{ISO} 15930-7:2008 {Graphic} technology - {Prepress} digital data exchange using {PDF} - {Part} 7: {Complete} exchange of printing data ({PDF}/{X}-4) and partial exchange of printing data with external profile reference ({PDF}/{X}-4p) using {PDF} 1.6},
	copyright = {Copyright ISO},
	shorttitle = {{ISO} 15930-7 ({PDF}/{X}-4) {WITHDRAWN}},
	url = {http://www.iso.org/cms/render/live/en/sites/isoorg/contents/data/standard/04/28/42876.html},
	abstract = {WITHDRAWN. Graphic technology -- Prepress digital data exchange using PDF -- Part 7: Complete exchange of printing data (PDF/X-4) and partial exchange of printing data with external profile reference (PDF/X-4p) using PDF 1.6},
	language = {English},
	urldate = {2019-05-07},
	publisher = {ISO},
	author = {{ISO TC 130 WG 2}},
	month = mar,
	year = {2008},
	note = {WITHDRAWN. PDF cannot be attached due to copyright restrictions. This standard must be purchased.},
	keywords = {ISO, ISO 15930, PDF/X, Withdrawn},
}

@misc{isotc130wg2ISO15930820082008,
	title = {{ISO} 15930-8:2008 {Graphic} technology - {Prepress} digital data exchange using {PDF} - {Part} 8: {Partial} exchange of printing data using {PDF} 1.6 ({PDF}/{X}-5)},
	copyright = {Copyright ISO},
	shorttitle = {{ISO} 15930-8 ({PDF}/{X}-5) {WITHDRAWN}},
	url = {http://www.iso.org/cms/render/live/en/sites/isoorg/contents/data/standard/04/28/42877.html},
	abstract = {Graphic technology -- Prepress digital data exchange using PDF -- Part 8: Partial exchange of printing data using PDF 1.6 (PDF/X-5)},
	language = {English},
	urldate = {2019-05-07},
	publisher = {ISO},
	author = {{ISO TC 130 WG 2}},
	month = may,
	year = {2008},
	note = {PDF cannot be attached due to copyright restrictions. This standard must be purchased.},
	keywords = {ISO, PDF 1.6, ISO 15930, PDF/X, Withdrawn},
}

@misc{isotc130wg2ISO15930520032003,
	title = {{ISO} 15930-5:2003 {Graphic} technology - {Prepress} digital data exchange using {PDF} - {Part} 5: {Partial} exchange of printing data using {PDF} 1.4 ({PDF}/{X}-2)},
	copyright = {Copyright ISO},
	shorttitle = {{ISO} 15930-5 ({PDF}/{X}-2) {WITHDRAWN}},
	url = {http://www.iso.org/cms/render/live/en/sites/isoorg/contents/data/standard/03/99/39939.html},
	abstract = {WITHDRAWN. Graphic technology -- Prepress digital data exchange using PDF -- Part 5: Partial exchange of printing data using PDF 1.4 (PDF/X-2)},
	language = {English},
	urldate = {2019-05-07},
	publisher = {ISO},
	editor = {{ISO TC 130 WG 2}},
	month = dec,
	year = {2003},
	note = {WITHDRAWN. PDF cannot be attached due to copyright restrictions. This standard must be purchased.},
	keywords = {ISO, ISO 15930, PDF/X, Withdrawn, PDF 1.4},
}

@misc{isotc171sc2wg5ISO19005120052005,
	title = {{ISO} 19005-1:2005 {Document} management - {Electronic} document file format for long-term preservation - {Part} 1: {Use} of {PDF} 1.4 ({PDF}/{A}-1)},
	copyright = {Copyright ISO},
	shorttitle = {{ISO} 19005-1 ({PDF}/{A}-1)},
	url = {http://www.iso.org/cms/render/live/en/sites/isoorg/contents/data/standard/03/89/38920.html},
	abstract = {Document management -- Electronic document file format for long-term preservation -- Part 1: Use of PDF 1.4 (PDF/A-1)},
	language = {English},
	urldate = {2019-05-07},
	publisher = {ISO},
	author = {{ISO TC 171 SC 2 WG 5}},
	month = nov,
	year = {2005},
	note = {PDF cannot be attached due to copyright restrictions. This standard must be purchased.},
	keywords = {PDF/A, ISO, ISO 19005, PDF 1.4},
}

@misc{isotc130wg2ISO15930620032003,
	title = {{ISO} 15930-6:2003 {Graphic} technology - {Prepress} digital data exchange using {PDF} - {Part} 6: {Complete} exchange of printing data suitable for colour-managed workflows using {PDF} 1.4 ({PDF}/{X}-3)},
	copyright = {Copyright ISO},
	shorttitle = {{ISO} 15930-6 ({PDF}/{X}-3)},
	url = {http://www.iso.org/cms/render/live/en/sites/isoorg/contents/data/standard/03/99/39940.html},
	abstract = {Graphic technology -- Prepress digital data exchange using PDF -- Part 6: Complete exchange of printing data suitable for colour-managed workflows using PDF 1.4 (PDF/X-3)},
	language = {English},
	urldate = {2019-05-07},
	publisher = {ISO},
	author = {{ISO TC 130 WG 2}},
	month = dec,
	year = {2003},
	note = {PDF cannot be attached due to copyright restrictions. This standard must be purchased.},
	keywords = {ISO, ISO 15930, PDF/X, PDF 1.4},
}

@misc{isotc130wg2ISO15930420032003,
	title = {{ISO} 15930-4:2003 {Graphic} technology - {Prepress} digital data exchange using {PDF} - {Part} 4: {Complete} exchange of {CMYK} and spot colour printing data using {PDF} 1.4 ({PDF}/{X}-1a)},
	copyright = {Copyright ISO},
	shorttitle = {{ISO} 15930-4 ({PDF}/{X}-1a)},
	url = {http://www.iso.org/cms/render/live/en/sites/isoorg/contents/data/standard/03/99/39938.html},
	abstract = {Graphic technology -- Prepress digital data exchange using PDF -- Part 4: Complete exchange of CMYK and spot colour printing data using PDF 1.4 (PDF/X-1a)},
	language = {English},
	urldate = {2019-05-07},
	publisher = {ISO},
	author = {{ISO TC 130 WG 2}},
	month = dec,
	year = {2003},
	note = {PDF cannot be attached due to copyright restrictions. This standard must be purchased.},
	keywords = {PDF, ISO 15930, PDF/X, PDF 1.4},
}

@misc{isotc130wg2ISO16612120052005,
	title = {{ISO} 16612-1:2005 {Graphic} technology - {Variable} printing data exchange - {Part} 1: {Using} {PPML} 2.1 and {PDF} 1.4 ({PPML}/{VDX}-2005)},
	copyright = {Copyright ISO},
	shorttitle = {{ISO} 16612-1 ({PPML}/{VDX})},
	url = {http://www.iso.org/cms/render/live/en/sites/isoorg/contents/data/standard/03/80/38013.html},
	abstract = {Graphic technology -- Variable printing data exchange -- Part 1: Using PPML 2.1 and PDF 1.4 (PPML/VDX-2005)},
	language = {English},
	urldate = {2019-05-07},
	publisher = {ISO},
	author = {{ISO TC 130 WG 2}},
	month = dec,
	year = {2005},
	note = {PDF cannot be attached due to copyright restrictions. This standard must be purchased.},
	keywords = {ISO, ISO 16612, PDF 1.4, PPML/VDX},
}

@misc{adobePDFBlendModes2006,
	title = {{PDF} {Blend} {Modes}: {Addendum}},
	copyright = {Copyright Adobe Systems Inc.},
	shorttitle = {{PDF} {Blend} {Modes} {Addendum}},
	abstract = {This is an addendum to the PDF Reference , fifth edition, version 1.6, containing additional information about the blend modes for PDF transparency. The information is added to Section 7.2.4, “Blend Mode,” which is provided here in its entirety.
This section now contains actual formulas for all of the blend modes, some of which previously only described the appearance of the results.
Note: The behavior of the blend modes themselves has not changed. All that this note is providing is more complete documentation of their behavior.},
	language = {English},
	publisher = {Adobe Systems Inc.},
	editor = {Adobe},
	month = jan,
	year = {2006},
	keywords = {Adobe},
	file = {2006 - PDF Blend Modes Addendum.pdf:/Users/tullsen/Zotero/storage/PCD5WE8D/2006 - PDF Blend Modes Addendum.pdf:application/pdf},
}

@book{adobePDFReferenceSixth2006,
	address = {USA},
	edition = {1.7},
	series = {{PDF} {Specification}},
	title = {{PDF} {Reference} sixth edition {Adobe} {Portable} {Document} {Format} {Version} 1.7},
	copyright = {Copyright Adobe Systems Inc.},
	shorttitle = {Adobe {PDF} 1.7 {Specification}},
	abstract = {The Adobe Portable Document Format (PDF) is the native file format of the Adobe® Acrobat® family of products. The goal of these products is to enable users to exchange and view electronic documents easily and reliably, independently of the environment in which they were created. PDF relies on the same imaging model as the PostScript® page description language to describe text and graphics in a device-independent and resolution-independent manner. To improve performance for interactive viewing, PDF defines a more structured format than that used by most PostScript language programs. PDF also includes objects, such as annotations and hypertext links, that are not part of the page itself but are useful for interactive viewing and document interchange.},
	language = {English},
	publisher = {Adobe Systems Inc.},
	editor = {{Adobe}},
	month = oct,
	year = {2006},
	keywords = {Adobe, PDF 1.7},
	file = {2006 - PDF Reference sixth edition Adobe Portable Documen.pdf:/Users/tullsen/Zotero/storage/I4BSMASQ/2006 - PDF Reference sixth edition Adobe Portable Documen.pdf:application/pdf},
}

@misc{adobeErrataPDFReference2006,
	title = {Errata for the {PDF} {Reference}, sixth edition, version 1.7},
	copyright = {Copyright Adobe Systems Inc.},
	shorttitle = {Adobe {PDF} 1.7 {Errata}},
	abstract = {Errata for the PDF Reference, sixth edition, version 1.7.
Last modified: October 16, 2006
This document describes errors and omissions in the PDF Reference, sixth edition, version 1.7.},
	language = {English},
	publisher = {Adobe Systems Inc.},
	editor = {Adobe},
	month = oct,
	year = {2006},
	keywords = {Adobe, PDF 1.7},
	file = {2006 - Errata for the PDF Reference, sixth edition, versi.pdf:/Users/tullsen/Zotero/storage/DDZW2H7T/2006 - Errata for the PDF Reference, sixth edition, versi.pdf:application/pdf},
}

@misc{adobeAdobeAcrobatSDK2007,
	title = {Adobe {Acrobat} {SDK} {Version} 8.1 {Implementation} of the {PDF} {Specification}},
	copyright = {Copyright Adobe Systems Inc.},
	abstract = {This guide describes PDF features supported in the Adobe® Acrobat® 8.1 family of products but not yet reflected in the PDF specification. The PDF specification is currently represented by the PDF Reference, sixth edition, version 1.7 (Nov. 2006) and the PDF Redaction: Addendum to the PDF Reference, sixth edition, version 1.7.},
	language = {English},
	publisher = {Adobe Systems Inc.},
	editor = {Adobe},
	month = apr,
	year = {2007},
	keywords = {Adobe},
	file = {2007 - Adobe Acrobat SDK Version 8.1 Implementation of th.pdf:/Users/tullsen/Zotero/storage/UW3VEXYD/2007 - Adobe Acrobat SDK Version 8.1 Implementation of th.pdf:application/pdf},
}

@misc{isotc171sc2wg5ISO19005220112007,
	title = {{ISO} 19005-2:2011 {Document} management - {Electronic} document file format for long-term preservation - {Part} 2: {Use} of {ISO} 32000-1 ({PDF}/{A}-2)},
	copyright = {Copyright ISO},
	shorttitle = {{ISO} 19005-2 ({PDF}/{A}-2)},
	url = {http://www.iso.org/cms/render/live/en/sites/isoorg/contents/data/standard/05/06/50655.html},
	abstract = {Document management -- Electronic document file format for long-term preservation -- Part 2: Use of ISO 32000-1 (PDF/A-2)},
	language = {English},
	urldate = {2019-05-07},
	publisher = {ISO},
	editor = {{ISO TC 171 SC 2 WG 5}},
	month = jul,
	year = {2007},
	note = {PDF cannot be attached due to copyright restrictions. This standard must be purchased.},
	keywords = {PDF/A, ISO, ISO 19005, ISO 32000},
}

@misc{isotc171sc2wg7ISO24517120082008,
	title = {{ISO} 24517-1:2008 {Document} management - {Engineering} document format using {PDF} - {Part} 1: {Use} of {PDF} 1.6 ({PDF}/{E}-1)},
	copyright = {Copyright ISO},
	shorttitle = {{ISO} 24517-1 ({PDF}/{E}-1)},
	url = {http://www.iso.org/cms/render/live/en/sites/isoorg/contents/data/standard/04/22/42274.html},
	abstract = {Document management -- Engineering document format using PDF -- Part 1: Use of PDF 1.6 (PDF/E-1)},
	language = {English},
	urldate = {2019-05-07},
	publisher = {ISO},
	author = {{ISO TC 171 SC 2 WG 7}},
	month = may,
	year = {2008},
	note = {PDF cannot be attached due to copyright restrictions. This standard must be purchased.},
	keywords = {ISO, PDF 1.6, ISO 24517, PDF/E},
}

@misc{adobeAdobeSupplementISO2009,
	title = {Adobe {Supplement} to {ISO} 32000-1 {BaseVersion}: 1.7 {ExtensionLevel}: 5},
	copyright = {Copyright Adobe Systems Inc.},
	shorttitle = {{PDF} 1.7 {Adobe} {Extension} {Level} 5},
	abstract = {Adobe® Acrobat® SDK Version 9.1
This document describes Adobe’s extension and implementation notes relative to the ISO 32000-1:2008, Document management, Portable document format, PDF 1.7. The extensions are to the PDF document format. Adobe has submitted these extensions to ISO for inclusion into the next version of the ISO 32000 specification and they have all been accepted for part 2 of ISO 32000.},
	language = {English},
	publisher = {Adobe Systems Inc.},
	editor = {{Adobe}},
	month = jun,
	year = {2009},
	keywords = {Adobe, PDF 1.7, ISO 32000},
	file = {2009 - Adobe Supplement to ISO 32000-1 BaseVersion 1.7 E.pdf:/Users/tullsen/Zotero/storage/AKZ2MCRH/2009 - Adobe Supplement to ISO 32000-1 BaseVersion 1.7 E.pdf:application/pdf},
}

@misc{adobeAdobeSupplementISO2008,
	title = {Adobe {Supplement} to {ISO} 32000-1 {BaseVersion}: 1.7 {ExtensionLevel}: 3},
	copyright = {Copyright Adobe Systems Inc.},
	shorttitle = {{PDF} 1.7 {Adobe} {Extension} {Level} 3},
	abstract = {Adobe Supplement to the ISO 32000 BaseVersion: 1.7 ExtensionLevel: 3
Adobe® Acrobat® SDK June 2008 Version 9.0
This document describes Adobe’s extension and implementation notes relative to the soon-to-be-published ISO 32000, Document management, Portable document format, PDF 1.7. Until ISO 32000 is published, this document will reference the PDF specification represented by the PDF Reference, sixth edition, version 1.7 (Nov. 2006) and the PDF Redaction: Addendum to the PDF Reference, sixth edition, version 1.7. The next version of this document to be released after ISO 32000 is published will reference specifications in ISO 32000.
The extensions are to the PDF document format. Adobe plans to submit these extensions to ISO as candidates for inclusion into the next version of the ISO 32000 specification. The implementation notes are differences between the PDF specification, including the extensions, and what is implemented in Adobe PDF applications, such as Adobe Acrobat.},
	language = {English},
	publisher = {Adobe Systems Inc.},
	editor = {{Adobe}},
	month = jun,
	year = {2008},
	keywords = {Adobe, PDF 1.7, ISO 32000},
	file = {2008 - Adobe Supplement to the ISO 32000 BaseVersion 1.7.pdf:/Users/tullsen/Zotero/storage/M8RFKS74/2008 - Adobe Supplement to the ISO 32000 BaseVersion 1.7.pdf:application/pdf},
}

@misc{isotc130wg2ISO15930720102010,
	title = {{ISO} 15930-7:2010 {Graphic} technology - {Prepress} digital data exchange using {PDF} - {Part} 7: {Complete} exchange of printing data ({PDF}/{X}-4) and partial exchange of printing data with external profile reference ({PDF}/{X}-4p) using {PDF} 1.6},
	copyright = {Copyright ISO},
	shorttitle = {{ISO} 15930-7 ({PDF}/{X}-4)},
	url = {http://www.iso.org/cms/render/live/en/sites/isoorg/contents/data/standard/05/58/55843.html},
	abstract = {Graphic technology -- Prepress digital data exchange using PDF -- Part 7: Complete exchange of printing data (PDF/X-4) and partial exchange of printing data with external profile reference (PDF/X-4p) using PDF 1.6},
	language = {English},
	urldate = {2019-05-07},
	publisher = {ISO},
	editor = {{ISO TC 130 WG 2}},
	month = jul,
	year = {2010},
	note = {PDF cannot be attached due to copyright restrictions. This standard must be purchased.},
	keywords = {ISO, PDF 1.6, ISO 15930, PDF/X},
}

@misc{isotc130wg2ISO15930820102010,
	title = {{ISO} 15930-8:2010 {Graphic} technology - {Prepress} digital data exchange using {PDF} - {Part} 8: {Partial} exchange of printing data using {PDF} 1.6 ({PDF}/{X}-5)},
	copyright = {Copyright ISO},
	shorttitle = {{ISO} 15930-8 ({PDF}/{X}-5)},
	url = {http://www.iso.org/cms/render/live/en/sites/isoorg/contents/data/standard/05/58/55844.html},
	abstract = {Graphic technology -- Prepress digital data exchange using PDF -- Part 8: Partial exchange of printing data using PDF 1.6 (PDF/X-5)},
	language = {English},
	urldate = {2019-05-07},
	publisher = {ISO},
	editor = {{ISO TC 130 WG 2}},
	month = jul,
	year = {2010},
	keywords = {ISO, PDF 1.6, ISO 15930, PDF/X},
}

@misc{adobeErrataPDFReference2003,
	title = {Errata for {PDF} {Reference}, third edition ({PDF} 1.4)},
	copyright = {Copyright Adobe Systems Inc.},
	shorttitle = {Adobe {PDF} 1.4 {Errata}},
	abstract = {Errata for PDF Reference, third edition PDF 1.4 
(as published in the first printing, November 2001)},
	language = {English},
	publisher = {Adobe Systems Inc.},
	editor = {Adobe},
	month = jun,
	year = {2003},
	keywords = {Adobe, PDF 1.4},
	file = {2003 - Errata for PDF Reference, third edition (PDF 1.4).pdf:/Users/tullsen/Zotero/storage/DJVQR8DQ/2003 - Errata for PDF Reference, third edition (PDF 1.4).pdf:application/pdf},
}

@misc{adobeErrataPDFReference2002,
	title = {Errata for {PDF} {Reference}, second edition ({PDF} 1.3)},
	copyright = {Copyright Adobe Systems Inc.},
	shorttitle = {Adobe {PDF} 1.3 {Errata}},
	abstract = {Errata for PDF Reference, second edition (PDF 1.3)
(as published in the first printing, July 2000)},
	language = {English},
	publisher = {Adobe Systems Inc.},
	editor = {{Adobe}},
	month = jan,
	year = {2002},
	keywords = {Adobe, PDF 1.3},
	file = {2002 - Errata for PDF Reference, second edition (as publi.pdf:/Users/tullsen/Zotero/storage/REYW7UWZ/2002 - Errata for PDF Reference, second edition (as publi.pdf:application/pdf},
}

@book{jimmeehanPDFReferenceThird2001,
	address = {USA},
	edition = {Third edition},
	series = {{PDF} {Specification}},
	title = {{PDF} {Reference} third edition {Adobe} {Portable} {Document} {Format} {Version} 1.4},
	volume = {1.4},
	copyright = {Copyright Adobe Systems Inc.},
	isbn = {0-201-75839-3},
	shorttitle = {Adobe {PDF} 1.4 {Specification}},
	abstract = {THE ADOBE PORTABLE DOCUMENT FORMAT (PDF) is the native file format of the Adobe® Acrobat® family of products. The goal of these products is to enable users to exchange and view electronic documents easily and reliably, independently of the environment in which they were created. PDF relies on the same imaging model as the PostScript® page description language to describe text and graphics in a device-independent and resolution-independent manner. To improve performance for interactive viewing, PDF defines a more structured format than that used by most PostScript language programs. PDF also includes objects, such as annotations and hypertext links, that are not part of the page itself but are useful for interactive viewing and document interchange.},
	language = {English},
	publisher = {Addison-Wesley Publishing Company},
	author = {{Jim Meehan} and {Ed Taft} and {Stephen Chernicoff} and {Caroline Rose}},
	month = dec,
	year = {2001},
	keywords = {Adobe, PDF 1.4},
	file = {2001 - PDF Reference third edition Adobe Portable Documen.pdf:/Users/tullsen/Zotero/storage/KYNRH2JH/2001 - PDF Reference third edition Adobe Portable Documen.pdf:application/pdf},
}

@book{jimmeehanPDFReferenceSecond2000,
	address = {USA},
	edition = {Second edition},
	series = {{PDF} {Specification}},
	title = {{PDF} {Reference} second edition {Adobe} {Portable} {Document} {Format} {Version} 1.3},
	volume = {1.3},
	copyright = {Copyright Adobe Systems Inc.},
	isbn = {0-201-61588-6},
	shorttitle = {Adobe {PDF} 1.3 {Specification}},
	abstract = {This book describes the Adobe Portable Document Format (PDF), the native file format of the Adobe® Acrobat® family of products. The goal of these products is to enable users to exchange and view electronic documents easily and reliably, independently of the environment in which they were created. PDF relies on the imaging model of the PostScript® page description language to describe text and graphics in a device-independent and resolution-independent manner. To improve performance for interactive viewing, PDF defines a more structured format than that used by most PostScript language programs. PDF also includes objects, such as annotations and hypertext links, that are not part of the page itself but are useful for interactive viewing and document interchange.},
	language = {English},
	publisher = {Addison-Wesley Publishing Company},
	author = {{Jim Meehan} and {Ed Taft} and {Stephen Chernicoff} and {Caroline Rose}},
	month = jul,
	year = {2000},
	keywords = {Adobe, PDF 1.3},
	file = {2000 - PDF Reference second edition Adobe Portable Docume.pdf:/Users/tullsen/Zotero/storage/VBYZ4YVD/2000 - PDF Reference second edition Adobe Portable Docume.pdf:application/pdf},
}

@book{timbienzPortableDocumentFormat1996,
	address = {USA},
	series = {{PDF} {Specification}},
	title = {Portable {Document} {Format} {Reference} {Manual} 1.2},
	volume = {1.2},
	copyright = {Copyright Adobe Systems Inc.},
	isbn = {0-201-62628-4},
	shorttitle = {Adobe {PDF} 1.2 {Specification}},
	abstract = {This book describes the Portable Document Format (PDF), the native file format of the Adobe® Acrobat® family of products. The goal of these products is to enable users to easily and reliably exchange and view electronic documents independent of the environment in which they were created. PDF relies on the imaging model of the PostScript® language to describe text and graphics in a device- and resolution-independent manner. To improve performance for interactive viewing, PDF defines a more structured format than that used by most PostScript language programs. PDF also includes objects, such as annotations and hypertext links, that are not part of the page itself but are useful for interactive viewing.
PDF files are built from a sequence of numbered objects similar to those used in the PostScript language. The text, graphics, and images that make up the contents of a page are represented using operators that are based on those in the PostScript language and that closely follow the Adobe Illustrator® 3.0 page description operators.
A PDF file is not a PostScript language program and cannot be directly interpreted by a PostScript interpreter. However, the page descriptions in a PDF file can be converted into a PostScript language program.},
	language = {English},
	publisher = {Addison-Wesley Publishing Company},
	author = {{Tim Bienz} and {Richard Cohn} and {James R. Meehan}},
	month = nov,
	year = {1996},
	keywords = {Adobe, PDF 1.2},
	file = {Tim Bienz et al. - 1996 - Portable Document Format Reference Manual 1.2.pdf:/Users/tullsen/Zotero/storage/ST6UMEMI/Tim Bienz et al. - 1996 - Portable Document Format Reference Manual 1.2.pdf:application/pdf},
}

@book{timbienzPortableDocumentFormat1996a,
	address = {USA},
	series = {{PDF} {Specification}},
	title = {Portable {Document} {Format} {Reference} {Manual} 1.1},
	volume = {1.1},
	copyright = {Copyright Adobe Systems Inc.},
	isbn = {0-201-62628-4},
	shorttitle = {Adobe {PDF} 1.1 {Specification}},
	abstract = {This book describes the Portable Document Format (PDF), the native file format of the Adobe™ Acrobat™ family of products. The goal of these products is to enable users to easily and reliably exchange and view electronic documents independent of the environment in which they were created. PDF relies on the imaging model of the PostScript™ language to describe text and graphics in a device- and resolution-independent manner. To improve performance for interactive viewing, PDF defines a more structured format than that used by most PostScript language programs. PDF also includes objects, such as annotations and hypertext links, that are not part of the page itself but are useful for interactive viewing.
PDF files are built from a sequence of numbered objects similar to those used in the PostScript language. The text, graphics, and images that make up the contents of a page are represented using operators based on those in the PostScript language, and closely follow the Adobe Illustrator™ 3.0 page description operators.
A PDF file is not a PostScript language program and cannot be directly interpreted by a PostScript interpreter. However, the page descriptions in a PDF file can be converted into a PostScript language program.},
	language = {English},
	publisher = {Addison-Wesley Publishing Company},
	author = {{Tim Bienz} and {Richard Cohn} and {James R. Meehan}},
	month = jan,
	year = {1996},
	keywords = {Adobe, PDF 1.1},
	file = {Portable Document Format Reference Manual 1.1.pdf:/Users/tullsen/Zotero/storage/6CNY94LT/Portable Document Format Reference Manual 1.1.pdf:application/pdf},
}

@book{timbienzPortableDocumentFormat1993,
	address = {USA},
	series = {{PDF} {Specification}},
	title = {Portable {Document} {Format} {Reference} {Manual} 1.0},
	volume = {1.0},
	copyright = {Adobe Systems Inc.},
	isbn = {0-201-62628-4},
	shorttitle = {Adobe {PDF} 1.0 {Specification}},
	abstract = {This book describes the Portable Document Format (PDF), the native file format of the Adobe™ Acrobat™ family of products. The goal of these products is to enable users to easily and reliably exchange and view electronic documents independent of the environment in which they were created. PDF relies on the imaging model of the PostScript™ language to describe text and graphics in a device- and resolutionindependent manner. To improve performance for interactive viewing, PDF defines a more structured format than that used by most PostScript language programs. PDF also includes objects, such as annotations and hypertext links, that are not part of the page itself but are useful for interactive viewing.
PDF files are built from a sequence of numbered objects similar to those used in the PostScript language. The text, graphics, and images that make up the contents of a page are represented using operators based on those in the PostScript language, and closely follow the Adobe Illustrator™ 3.0 page description operators.
A PDF file is not a PostScript language program and cannot be directly interpreted by a PostScript interpreter. However, the page descriptions in a PDF file can be converted into a PostScript language program.},
	language = {English},
	publisher = {Addison-Wesley Publishing Company},
	author = {{Tim Bienz} and {Richard Cohn}},
	month = jun,
	year = {1993},
	keywords = {Adobe, PDF 1.0},
	file = {Tim Bienz and Richard Cohn - 1993 - Portable Document Format Reference Manual 1.0.pdf:/Users/tullsen/Zotero/storage/LMTIKJN5/Tim Bienz and Richard Cohn - 1993 - Portable Document Format Reference Manual 1.0.pdf:application/pdf},
}

@misc{isotc171sc2wg5ISO19005120052011,
	title = {{ISO} 19005-1:2005/{Cor} 2:2011},
	copyright = {Copyright ISO},
	shorttitle = {{ISO} 19005-1 ({PDF}/{A}-1)},
	url = {http://www.iso.org/cms/render/live/en/sites/isoorg/contents/data/standard/06/06/60603.html},
	abstract = {Technical Corrigenda for PDF/A-1},
	language = {English},
	urldate = {2019-05-07},
	publisher = {ISO},
	author = {{ISO TC 171 SC 2 WG 5}},
	month = dec,
	year = {2011},
	keywords = {PDF/A, ISO, ISO 19005},
}

@misc{isotc171sc2wg5ISO19005120052007,
	title = {{ISO} 19005-1:2005/{Cor} 1:2007},
	copyright = {Copyright ISO},
	shorttitle = {{ISO} 19005-1 ({PDF}/{A}-1)},
	url = {http://www.iso.org/cms/render/live/en/sites/isoorg/contents/data/standard/04/56/45613.html},
	abstract = {Technical Corrigenda for PDF/A-1},
	language = {English},
	urldate = {2019-05-07},
	publisher = {ISO},
	author = {{ISO TC 171 SC 2 WG 5}},
	month = apr,
	year = {2007},
	keywords = {PDF/A, ISO, ISO 19005},
}

@misc{isotc130wg2ISO15930320022002,
	title = {{ISO} 15930-3:2002 {Graphic} technology - {Prepress} digital data exchange - {Use} of {PDF} -- {Part} 3: {Complete} exchange suitable for colour-managed workflows ({PDF}/{X}-3)},
	copyright = {Copyright ISO},
	shorttitle = {{ISO} 15930-3 ({PDF}/{X}-3)},
	url = {http://www.iso.org/cms/render/live/en/sites/isoorg/contents/data/standard/03/49/34941.html},
	abstract = {Graphic technology -- Prepress digital data exchange -- Use of PDF -- Part 3: Complete exchange suitable for colour-managed workflows (PDF/X-3)},
	language = {English},
	urldate = {2019-05-07},
	publisher = {ISO},
	editor = {ISO TC 130 WG 2},
	month = sep,
	year = {2002},
	note = {PDF cannot be attached due to copyright restrictions. This standard must be purchased.},
	keywords = {ISO, ISO 15930, PDF/X},
}

@misc{adobeErrataPDFReference2005,
	title = {Errata for {PDF} {Reference}, fifth edition ({PDF} 1.6)},
	copyright = {Copyright Adobe Systems Inc.},
	shorttitle = {Adobe {PDF} 1.6 {Errata}},
	abstract = {Errata for PDF Reference, fifth edition (PDF 1.6)
Last modified: August 31, 2005},
	language = {English},
	publisher = {Adobe Systems Inc.},
	editor = {Adobe},
	month = aug,
	year = {2005},
	file = {2005 - Errata for PDF Reference, fifth edition (PDF 1.6).pdf:/Users/tullsen/Zotero/storage/C39ZP4JU/2005 - Errata for PDF Reference, fifth edition (PDF 1.6).pdf:application/pdf},
}

@misc{adobeErrataPDFReference2004,
	title = {Errata for {PDF} {Reference}, fourth edition ({PDF} 1.5)},
	copyright = {Copyright Adobe Systems Inc.},
	shorttitle = {Adobe {PDF} 1.5 {Errata}},
	abstract = {Errata for PDF Reference, fourth edition
(as published online at the partners.adobe.com web site on 8/28/2003)},
	language = {English},
	publisher = {Adobe Systems Inc.},
	editor = {{Adobe}},
	month = oct,
	year = {2004},
	file = {2003 - Errata for PDF Reference, fourth edition (PDF 1.5).pdf:/Users/tullsen/Zotero/storage/UXLKU3S3/2003 - Errata for PDF Reference, fourth edition (PDF 1.5).pdf:application/pdf},
}

@misc{GhostScriptMuPDFBugzilla,
	title = {{GhostScript} \& {MuPDF} {Bugzilla} {Main} {Page}},
	shorttitle = {{GhostScript} {Bugs}},
	url = {https://bugs.ghostscript.com/},
	urldate = {2019-05-06},
	annote = {Summary
Database of past issues (bugs) related to GhostScript and MuPDF. Mining these bugs for PDF file attachments provides many examples of technically invalid and unusual PDF files. Can also be mined for PostScript files.},
}

@inproceedings{phelpsTwoDietPlans2003,
	address = {Grenoble, France},
	series = {{ACM} {DocEng} '03},
	title = {Two {Diet} {Plans} for {Fat} {PDF}},
	isbn = {978-1-58113-724-8},
	shorttitle = {Compact {PDF}},
	url = {http://doi.acm.org/10.1145/958220.958253},
	doi = {10.1145/958220.958253},
	abstract = {As Adobe's Portable Document Format has exploded in popularity so too has the number PDF generators, and predictably the quality of generated PDF varies considerably. This paper surveys a range of PDF optimizations for space, and reports the results of a tool that can postprocess existing PDFs to reduce file sizes by 20 to 70\% for large classes of PDFs. (Further reduction can often be obtained by recoding images to lower resolutions or with newer compression methods such as JBIG2 or JPEG2000, but those operations are independent of PDF per se and not a component of the results reported here.) A new PDF storage format called "Compact PDF" is introduced that achieves for many classes of PDF an additional reduction of 30 to 60\% beyond what is possible in the latest PDF specification (version 1.5, corresponding to Acrobat 6); for example, the PDF 1.5 Reference manual shrinks from 12.2MB down to 4.2MB. The changes required by Compact PDF to the PDF specification and to PDF readers are easily understood and straightforward to implement.},
	language = {English},
	urldate = {2019-04-22},
	booktitle = {Proceedings of the 2003 {ACM} {Symposium} on {Document} {Engineering}},
	publisher = {ACM},
	author = {Phelps, Thomas A. and Wilensky, Robert},
	year = {2003},
	keywords = {PDF, compact PDF, compression, multivalent},
	pages = {175--184},
	annote = {Summary
An example of some proposed PDF extensions (which were never adopted by industry as this was well before ISO).},
	file = {Phelps and Wilensky - 2003 - Two Diet Plans for Fat PDF.pdf:/Users/tullsen/Zotero/storage/F5PVQ2Z7/Phelps and Wilensky - 2003 - Two Diet Plans for Fat PDF.pdf:application/pdf},
}

@phdthesis{thomasstefanDigitalSignatureVerification2017,
	address = {Prague},
	title = {Digital {Signature} {Verification} in {PDF}},
	url = {https://dspace.cvut.cz/bitstream/handle/10467/76810/F8-BP-2018-Stefan-Tomas-thesis.pdf?sequence=-1},
	abstract = {The subject of the presented thesis is the area of digital signatures with special attention to their use in PDF files. A short introduction containing basic principles and a summary of essential features are provided, as well as the basics of the PDF file structure. Different types of digital signatures in PDF files are described in more detail. A way to verify the validity of the basic PDF digital signature in Linux is demonstrated with a library written in C and a simple command line application.},
	language = {English},
	school = {Czech Technical University},
	author = {{Thomas Stefan}},
	month = nov,
	year = {2017},
	keywords = {Digital Signature},
	file = {Thomas Stefan - 2017 - Digital Signature Verification in PDF.pdf:/Users/tullsen/Zotero/storage/92ZRXC8X/Thomas Stefan - 2017 - Digital Signature Verification in PDF.pdf:application/pdf},
}

@book{jimmeehanPDFReferenceFifth2004,
	address = {USA},
	series = {{PDF} {Specification}},
	title = {{PDF} {Reference} fifth edition {Adobe} {Portable} {Document} {Format} {Version} 1.6},
	volume = {1.6},
	copyright = {Copyright Adobe Systems Inc.},
	shorttitle = {Adobe {PDF} 1.6 {Specification}},
	abstract = {The Adobe Portable Document Format (PDF) is the native file format of the Adobe® Acrobat® family of products. The goal of these products is to enable users to exchange and view electronic documents easily and reliably, independently of the environment in which they were created. PDF relies on the same imaging model as the PostScript® page description language to describe text and graphics in a device-independent and resolution-independent manner. To improve performance for interactive viewing, PDF defines a more structured format than that used by most PostScript language programs. PDF also includes objects, such as annotations and hypertext links, that are not part of the page itself but are useful for interactive viewing and document interchange.},
	language = {English},
	publisher = {Adobe Systems Inc.},
	author = {{Jim Meehan} and {Ed Taft} and {Stephen Chernicoff} and {Caroline Rose} and {Ron Karr}},
	month = nov,
	year = {2004},
	file = {Jim Meehan et al. - 2004 - PDF Reference fifth edition Adobe Portable Documen.pdf:/Users/tullsen/Zotero/storage/7ZXTRE2C/Jim Meehan et al. - 2004 - PDF Reference fifth edition Adobe Portable Documen.pdf:application/pdf},
}

@book{jimmeehanPDFReferenceFourth2003,
	address = {USA},
	edition = {1.5},
	series = {{PDF} {Specification}},
	title = {{PDF} {Reference} fourth edition {Adobe} {Portable} {Document} {Format} {Version} 1.5},
	copyright = {Copyright Adobe Systems Inc.},
	shorttitle = {Adobe {PDF} 1.5 {Specification}},
	abstract = {THE ADOBE PORTABLE DOCUMENT FORMAT (PDF) is the native file format of the Adobe® Acrobat® family of products. The goal of these products is to enable users to exchange and view electronic documents easily and reliably, independently of the environment in which they were created. PDF relies on the same imaging model as the PostScript® page description language to describe text and graphics in a device-independent and resolution-independent manner. To improve performance for interactive viewing, PDF defines a more structured format than that used by most PostScript language programs. PDF also includes objects, such as annotations and hypertext links, that are not part of the page itself but are useful for interactive viewing and document interchange.},
	language = {English},
	publisher = {Adobe Systems Inc.},
	author = {{Jim Meehan} and {Ed Taft} and {Stephen Chernicoff} and {Caroline Rose} and {Ron Karr}},
	month = aug,
	year = {2003},
	file = {Jim Meehan et al. - 2003 - PDF Reference fourth edition Adobe® Portable Docum.pdf:/Users/tullsen/Zotero/storage/HS6A7S7Y/Jim Meehan et al. - 2003 - PDF Reference fourth edition Adobe® Portable Docum.pdf:application/pdf},
}

@article{klindtPDFConsideredHarmful2017,
	title = {{PDF}/{A} considered harmful for digital preservation},
	abstract = {Today, the Portable Document Format (PDF) is the prevalent file format for the exchange of fixed content electronic documents for publication, research, and dissemination work in the academic and cultural heritage domains. Therefore it is not surprising that PDF/A is perceived to be an archival format suitable for digital archiving workflows.
This paper gives a rather short overview about the history and technical complexity of the format, its benefits, shortcomings and potential pitfalls in the area of digital preservation with respect to aspects of accessibility and reusability of the information content of PDF/A.
Several potential problems within the creation, preservation, and dissemination contexts are identified that may create problems for present and future content users. It also discusses some of the risks inherent to PDF/A for parts of the preservation community and suggests possible strategies to mitigate problems that might prevent future human or machine-based usability of the data and information stored within digital archives.},
	language = {English},
	author = {Klindt, Marco},
	month = sep,
	year = {2017},
	keywords = {PDF/A},
	pages = {10},
	file = {Klindt - 2017 - PDFA considered harmful for digital preservation.pdf:/Users/tullsen/Zotero/storage/EA4M4DTA/Klindt - 2017 - PDFA considered harmful for digital preservation.pdf:application/pdf},
}

@inproceedings{bindraMasqueradingTrustworthyEntity2011,
	title = {Masquerading as a {Trustworthy} {Entity} through {Portable} {Document} {File} ({PDF}) {Format}},
	url = {https://ieeexplore.ieee.org/abstract/document/6113216},
	doi = {10.1109/PASSAT/SocialCom.2011.148},
	abstract = {Statistics indicate that more than 1000 phishing attacks are launched every month. With 57 million people hit by the fraudulent attacks so far in America alone, a million-dollar question crops up - how to combat phishing? This paper aims to discuss strategies in the war against phishing. It is an examination of the ways adopted at various levels to counter the crescendo of phishing attacks and new techniques being evolved for the same. All across the globe today, Adobe Portable Document Format stands as the most popularly used and trusted document description format. The overriding objective is to exhaustively explore and evaluate the risk attached to PDF in terms of masquerading as a trustworthy entity which could successfully result in an attack. A few risk reduction security measures at the users' level are also suggested. This work intends to increase the awareness and understanding of the internet user throughout the world. Furthermore, an effort has been put in to even discuss plausible countermeasures. This conceptual paper is definitely expected to contribute to future research on similar and related topics as spin off from this study.},
	booktitle = {2011 {IEEE} {Third} {International} {Conference} on {Privacy}, {Security}, {Risk} and {Trust} and 2011 {IEEE} {Third} {International} {Conference} on {Social} {Computing}},
	author = {Bindra, G. S.},
	month = oct,
	year = {2011},
	keywords = {Portable document format, Adobe, Electronic mail, Adobe portable document format, Anti-phishing, computer crime, Computers, Fake pages, File execution, fraud, fraudulent attack, Internet, Masquerading, PDF format, phishing attack, portable document file, Portable Document Format (PDF), Postal services, risk reduction, Security, Security analysis, security measure, Software, statistical analysis, statistics, trusted computing, trusted document description format, trustworthy entity, unsolicited e-mail},
	pages = {784--789},
	file = {Bindra - 2011 - Masquerading as a Trustworthy Entity through Porta.pdf:/Users/tullsen/Zotero/storage/9Q9LHBHR/Bindra - 2011 - Masquerading as a Trustworthy Entity through Porta.pdf:application/pdf},
}

@misc{ElectronicSignaturesInfrastructures2017,
	title = {Electronic {Signatures} and {Infrastructures} ({ESI}); {Scoping} study and framework for standardization of long-term data preservation services, including preservation of/with digital signatures},
	shorttitle = {{ETSI} {SR} 019 510 v1.1.1 (2017-05)},
	language = {English},
	publisher = {ETSI},
	month = may,
	year = {2017},
	keywords = {Digital Signature},
	file = {Electronic Signatures and Infrastructures (ESI)\; S.pdf:/Users/tullsen/Zotero/storage/SFWP989Q/Electronic Signatures and Infrastructures (ESI)\; S.pdf:application/pdf},
}

@misc{leonardrosentholPDFStateUnion2016,
	address = {Berlin, Germany},
	type = {Keynote},
	title = {{PDF} {State} of the {Union}},
	abstract = {Usage and feature statistics acquired by Adobe.},
	language = {English},
	author = {{Leonard Rosenthol}},
	month = may,
	year = {2016},
	file = {Leonard Rosenthol - 2016 - PDF State of the Union.pdf:/Users/tullsen/Zotero/storage/5JJ7T5FF/Leonard Rosenthol - 2016 - PDF State of the Union.pdf:application/pdf},
}

@techreport{carolinearmsBenefitsRisksPDF2014,
	address = {USA},
	title = {The {Benefits} and {Risks} of the {PDF}/{A}-3 {File} format for {Archival} {Institutions}},
	copyright = {Creative Commons Attribution 3.0 Unported},
	url = {http://hdl.loc.gov/loc.gdc/lcpub.2013655115.1},
	abstract = {As the most recent iteration of the PDF/A series of specifications, PDF/A-3 adds a single and highly significant feature to its predecessor PDF/A-2 (ISO 19005-2:2011). The PDF/A-2 specification permitted the embedding of other files as long as the embedded files were valid PDF/A files. PDF/A-3 (ISO 19005-3:2012) permits the embedding of files of any format (including XML, CSV, CAD, images, binary executables, etc.), within a PDF/A file.
This new feature is intended to expand the functionality of PDF/A from “electronic paper” (albeit suitable for use over the very long term) to an archival format for a page-oriented document that can be bundled with related files. While a PDF/A file’s primary document is intended to be robust against preservation risks over the very long term, PDF/A-3 does not require that the embedded files be considered archival content. A PDF/A-3 conformant reader is responsible for presenting only the primary document, but permits extraction of embedded files for use with other tools.
The NDSA PDF/A-3 Working Group recognizes that there are scenarios in which such an “archival bundling” use of PDF/A-3 might make sense. The concept of “hybrid archiving” has been proposed, where a source document in a less preservation-robust format is edited and then embedded in a “save-as” PDF/A-3 file, thereby having an always-current version of the document in a preservation-robust format. PDF/A-3 could also be employed in transactional workflows, where the “classic” PDF document would contain human-readable content, and presumably-equivalent content in embedded files of another format (typically XML), which would be processed by machine. Both scenarios would require PDF/A-3 processors that have been extended with custom capabilities tailored to the scenario.
For memory institutions, any such use of embedded files in PDF/A documents would depend on very specific protocols between depositors and archival repositories, clarifying the formats acceptable as embedded files, and defining a workflow that guarantees that the relationship between the PDF document and any embedded files is fully understood by the archival institution. Of greater concern is the possibility for PDF/A-3 to be used in some scenarios as a general-purpose bundling format, with the visible primary document of less long-term importance than the embedded files.
Further, the scenarios presented above are complicated by the fact that their articulation is not a part of the standard itself. Nor do the use cases articulated by proponents of the new standard address the complexities implied by retaining PDF/A’s character as the digital equivalent of acid-free paper, while at the same time permitting its use as a general-purpose archival bundling format. In the latter case especially, the complexity of the PDF format and the potential and actual resulting faultiness of PDF rendering implementations and creating applications suggest that PDF/A-3 may be appropriate for use in controlled workflows, but may not be an appropriate choice as a general-purpose bundling format.
The working group strongly recommends that tools that create PDF/A-compliant documents be engineered to identify (through the pdfaid:part element) files that have no embedded files, or whose embedded files are all in PDF/A format, as compliant with PDF/A-2 rather than PDF/A-3.},
	language = {English},
	institution = {NDSA Standards and Practices Working Group},
	author = {{Caroline Arms} and {Don Chalfant} and {Kevin DeVorsey} and {Chris Dietrich} and {Carl Fleischhauer} and {Butch Lazorchak} and {Sheila Morrissey} and {Kate Murray}},
	month = feb,
	year = {2014},
	pages = {25},
	file = {Caroline Arms et al. - 2014 - The Benefits and Risks of the PDFA-3 File format .pdf:/Users/tullsen/Zotero/storage/HB9M3FF7/Caroline Arms et al. - 2014 - The Benefits and Risks of the PDFA-3 File format .pdf:application/pdf},
}

@inproceedings{dan-sabinpopescuHidingMaliciousContent2010,
	title = {Hiding {Malicious} {Content} in {PDF} {Documents}},
	abstract = {This paper is a proof-of-concept demonstration for a specific digital signatures vulnerability that shows the ineffectiveness of the WYSIWYS (What You See Is What You Sign) concept. The algorithm is fairly simple: the attacker generates a polymorphic file that has two different types of content (text, as a PDF document for example, and image: TIFF – two of the most widely used file formats). When the victim signs the dual content file, he/ she only sees a PDF document and is unaware of the hidden content inside the file. After obtaining the legally signed document from the victim, the attacker simply has to change the extension to the other file format. This will not invalidate the digital signature, as no bits were altered. The destructive potential of the attack is considerable, as the Portable Document Format (PDF) is widely used in e-government and in e-business contexts.},
	author = {Dan-Sabin Popescu},
	year = {2010},
	keywords = {Digital Signature, polyglot},
	annote = {Summary
Describes a "polyglot" file which is both PDF and TIFF where the PDF file is digitally signed, the file is renamed (so no bytes changed!) and the TIFF file is then viewed. Relies on the behaviour that "\%PDF-x.y" can occur in first 1024 bytes.
Mitigation described is to detect the TIFF by using ImageMagik tools.
Somewhat naive since TIFF is not tagged as being digitally signed. Also tools which re-write the PDF during signing will avoid threat.
 },
	file = {Dan-Sabin Popescu - Hiding Malicious Content in PDF Documents.pdf:/Users/tullsen/Zotero/storage/Z4KW2RQ8/Dan-Sabin Popescu - Hiding Malicious Content in PDF Documents.pdf:application/pdf},
}

@article{maiorcaRobustDetectionAdversarial2018,
	title = {Towards {Robust} {Detection} of {Adversarial} {Infection} {Vectors}: {Lessons} {Learned} in {PDF} {Malware}},
	volume = {abs/1811.00830},
	shorttitle = {Towards {Robust} {Detection} of {Adversarial} {Infection} {Vectors}},
	doi = {10.1016/j.jss.2010.04.062},
	abstract = {The Portable Document Format (PDF) was developed by Adobe in the early nineties and today it is the defacto standard for electronic document exchange. It allows reliable reproductions of published materials on any platform and it is used by many governmental and educational institutions, as well as companies and individuals. PDF documents are also credited with being more secure than other document formats such as Microsoft Compound Document File Format or Rich Text Format. This paper investigates the Portable Document Format and shows that it is not immune from some privacy related issues that affect other popular document formats. From a PDF document, it is possible to retrieve any text or object previously deleted or modified, extract user information and perform some},
	journal = {CoRR},
	author = {Maiorca, Davide and Biggio, Battista and Giacinto, Giorgio},
	year = {2018},
	keywords = {Portable Document Format, Privacy},
	file = {Maiorca et al. - 2018 - Towards Robust Detection of Adversarial Infection .pdf:/Users/tullsen/Zotero/storage/H544NJ9C/Maiorca et al. - 2018 - Towards Robust Detection of Adversarial Infection .pdf:application/pdf},
}

@inproceedings{liFEPDFRobustFeature2017,
	title = {{FEPDF}: {A} {Robust} {Feature} {Extractor} for {Malicious} {PDF} {Detection}},
	doi = {10.1109/Trustcom/BigDataSE/ICESS.2017.240},
	abstract = {Due to rich characteristics and functionalities, PDF format has become the de facto standard for the electronic document exchange. As vulnerabilities in the major PDF viewers have been disclosed, a number of methods have been proposed to tame the increasing PDF threats. However, one recent evasion exploit is found to evade most of detections and renders all of the major static methods void. Moreover, many existing vulnerabilities identified before can now evade the detection through exploiting this evasion exploit. In this paper, we introduce this newly identified evasion exploit and propose a new feature extractor FEPDF to detect malicious PDFs. Based on the FEPDF and the JavaScript detection model, we test the performance of the proposed feature extractor FEPDF, and evaluation results show that FEPDF has a satisfactory performance in malicious PDF detection.},
	booktitle = {2017 {IEEE} {Trustcom}/{BigDataSE}/{ICESS}},
	publisher = {IEEE},
	author = {Li, M. and Liu, Y. and Yu, M. and Li, G. and Wang, Y. and Liu, C.},
	month = aug,
	year = {2017},
	keywords = {Portable document format, Feature extraction, Standards, Security, Code obfuscation, de facto standard, Dictionaries, document handling, electronic document exchange, feature extraction, feature extractor, FEPDF, Malicious JavaScript, malicious PDF detection, Malware Detection, PDF Documents, PDF threats, Robustness, security of data, Streaming media},
	pages = {218--224},
	file = {Li et al. - 2017 - FEPDF A Robust Feature Extractor for Malicious PD.pdf:/Users/tullsen/Zotero/storage/4ZTP46W6/Li et al. - 2017 - FEPDF A Robust Feature Extractor for Malicious PD.pdf:application/pdf},
}

@inproceedings{y.fengSystematicMethodPDF2018,
	title = {A {Systematic} {Method} on {PDF} {Privacy} {Leakage} {Issues}},
	doi = {10.1109/TrustCom/BigDataSE.2018.00144},
	abstract = {PDF is extensively employed worldwide in the current time. A vast number of PDF are disseminated over the Internet during people's exchange of documents. The private information that is hidden in PDF document structure is revealed with documents, which causes privacy leakage. To systematically analyze and address the issues, we conduct a series of studies. We find possible sources of PDF personal privacy leaks and design a methodology to extract and recognize sensitive information automatically. Our methodology is helpful for users to check whether their PDF documents contain privacy information prior to transmission via the Internet. We conduct an experiment, and the results indicate the effectiveness of our method. We then experiment tens of thousands of benign and malicious PDF documents gathered from multiple sources around the world to analyze the current privacy leakage situation of PDF documents. Our analysis demonstrates that nearly 70\% of people lack the awareness of privacy protection when employing PDF documents. We also discuss the special usage of our method in cyberattack attribution.},
	booktitle = {2018 17th {IEEE} {International} {Conference} {On} {Trust}, {Security} {And} {Privacy} {In} {Computing} {And} {Communications}/ 12th {IEEE} {International} {Conference} {On} {Big} {Data} {Science} {And} {Engineering} ({TrustCom}/{BigDataSE})},
	publisher = {IEEE},
	author = {{Y. Feng} and {B. Liu} and {X. Cui} and {C. Liu} and {X. Kang} and {J. Su}},
	month = aug,
	year = {2018},
	keywords = {PDF, Portable document format, Data mining, Metadata, Tools, Internet, Privacy, data protection, document structure, documents, framework, Information retrieval, PDF document structure, PDF personal privacy leaks, PDF privacy leakage issues, privacy, privacy information, privacy leakage, privacy protection, sensitive information},
	pages = {1020--1029},
	file = {Y. Feng et al. - 2018 - A Systematic Method on PDF Privacy Leakage Issues.pdf:/Users/tullsen/Zotero/storage/SWSP9CNW/Y. Feng et al. - 2018 - A Systematic Method on PDF Privacy Leakage Issues.pdf:application/pdf},
}

@inproceedings{s.g.sayedDataMiningBased2018,
	title = {Data {Mining} {Based} {Strategy} for {Detecting} {Malicious} {PDF} {Files}},
	doi = {10.1109/TrustCom/BigDataSE.2018.00097},
	abstract = {Portable Document Format (PDF) is one of the widely-accepted document format. However, it becomes one of the most attractive targets for exploitation by malware developers and vulnerability researchers. Malicious PDF files can be used in Advanced Persistent Threats (APTs) targeting individuals, governments, and financial sectors. The existing tools such as intrusion detection systems (IDSs) and antivirus packages are inefficient to mitigate this kind of attacks. This is because these techniques need regular updates with the new malicious PDF files which are increasing every day. In this paper, a new algorithm is presented for detecting malicious PDF files based on data mining techniques. The proposed algorithm consists of feature selection stage and classification stage. The feature selection stage is used to the select the optimum number of features extracted from the PDF file to achieve high detection rate and low false positive rate with small computational overhead. Experimental results show that the proposed algorithm can achieve 99.77\% detection rate, 99.84\% accuracy, and 0.05\% false positive rate.},
	booktitle = {2018 17th {IEEE} {International} {Conference} {On} {Trust}, {Security} {And} {Privacy} {In} {Computing} {And} {Communications}/ 12th {IEEE} {International} {Conference} {On} {Big} {Data} {Science} {And} {Engineering} ({TrustCom}/{BigDataSE})},
	publisher = {IEEE},
	author = {{S. G. Sayed} and {M. Shawkey}},
	month = aug,
	year = {2018},
	keywords = {Portable document format, Data mining, Feature extraction, Training, Malware, Portable Document Format, document handling, feature extraction, Streaming media, advanced persistent threats, APTs, classification stage, data mining, false positive rate, feature selection, feature selection stage, features extraction, gravitational research algorithm (GSA), Heuristic algorithms, invasive software, Malicious PDF detection, malicious PDF files detection, PDF file},
	pages = {661--667},
	file = {S. G. Sayed and M. Shawkey - 2018 - Data Mining Based Strategy for Detecting Malicious.pdf:/Users/tullsen/Zotero/storage/3IJAARZE/S. G. Sayed and M. Shawkey - 2018 - Data Mining Based Strategy for Detecting Malicious.pdf:application/pdf},
}

@article{didierstevensMaliciousPDFDocuments2011,
	title = {Malicious {PDF} {Documents} {Explained}},
	volume = {9},
	issn = {1540-7993},
	doi = {10.1109/MSP.2011.14},
	abstract = {What makes a PDF file malicious? PDF designers and the PDF reader software architects never intended for files to be able to modify the operating system running the PDF reader. But security researchers and malware authors found ways to exploit PDF readers' software bugs and to creatively use the PDF language, enabling them to produce PDF documents that execute arbitrary code. Embedded files are a good example of this design philosophy. The PDF language allows files to be embedded inside PDF documents.PDF reader software designers have begun using Windows security features such as data execution prevention (DEP) and address space layout randomization (ASLR) to prevent exploits from executing.},
	number = {1},
	journal = {IEEE Security Privacy},
	author = {{Didier Stevens}},
	month = jan,
	year = {2011},
	keywords = {PDF, Portable document format, Malware, Software, document handling, security of data, Computer crime, malicious PDF documents, malware authors, operating system, operating systems (computers), PDF language, program debugging, security researchers, software bugs, Windows security},
	pages = {80--82},
	file = {Stevens - 2011 - Malicious PDF Documents Explained.pdf:/Users/tullsen/Zotero/storage/W46P4CDE/Stevens - 2011 - Malicious PDF Documents Explained.pdf:application/pdf},
}

@inproceedings{raynalfredericMaliciousOrigamiPDF2008,
	title = {Malicious origami in {PDF}},
	url = {http://esec-lab.sogeti.com/static/publications/08-pacsec-maliciouspdf.pdf},
	author = {{Raynal, Frederic} and {Delugre, Guillaume}},
	month = nov,
	year = {2008},
	file = {Raynal, Frederic and Delugre, Guillaume - Malicious origami in PDF.pdf:/Users/tullsen/Zotero/storage/G9MGLMDV/Raynal, Frederic and Delugre, Guillaume - Malicious origami in PDF.pdf:application/pdf},
}

@misc{TipHowDownload,
	title = {Tip: {How} to download thousands of {MS} {Office} files for testing {\textbar} {Decalage}},
	url = {http://www.decalage.info/en/download_mso_files},
	urldate = {2019-03-08},
	keywords = {Corpora},
	annote = {Summary
References other sources:

Enron Email Dataset: e-mail dataset containing a large number of attachments.
Govdocs1: a dataset from 2010 containing around 1M files of various formats, built for forensics purposes.

Provides CommonCrawl information and references for simple tooling.
Could also be informative for sources of application-specific data (office files) for generating synthetic test files.},
}

@mastersthesis{kittilsenDetectingMaliciousPDF2011,
	address = {Norway},
	title = {Detecting malicious {PDF} documents},
	abstract = {As the internet has become the new playground for organized crime and foreign intelligence, the sophistication of internet attacks has increased. The traditional attacks targeting listening services on the target computer is no longer as viable as it used to, much thanks to firewalls, NAT and more secure administration of servers. This has forced the attackers to find new targets, which they have found in client applications, and in the users themselves. Client-side attacks are now the most used method of attack on the internet. A popular vector for conducting such attacks are malicious PDF documents. Traditional signature based network intrusion detection systems (IDS) have a hard time detecting such threats, and no good alternative solutions have been discovered.
In this thesis we seek the answer to the question ”How can malicious PDF-documents transferred in a network be detected? “ An anomaly based network IDS approach was chosen, several machine learning classifiers were investigated and Support Vector Machines gave the best accuracy and performance. Several features of PDFs are analyzed in order to retrieve those significant for the detection of malicious PDF documents. Experiments were performed to find the best combination of features and SVM configurations to maximize performance of the detection algorithm. A real world study was also performed by implementing the algorithm in a network belonging to the Norwegian Defence.},
	school = {Department of Computer Science and Media Technology, Gjøvik University College},
	author = {Kittilsen, Jarle},
	year = {2011},
	annote = {Summary
Long (132 page) thesis},
	file = {Kittilsen - 2011 - Detecting malicious PDF documents.pdf:/Users/tullsen/Zotero/storage/4BCLD2IV/Kittilsen - 2011 - Detecting malicious PDF documents.pdf:application/pdf},
}

@inproceedings{xuFastPreciseMalicious2012,
	title = {A {Fast} and {Precise} {Malicious} {PDF} {Filter}},
	booktitle = {Proceedings of 22nd {Virus} {Bulletin} {Conference}},
	author = {Xu, Wei and Wang, Xinran and Zhang, Y and Xie, H},
	year = {2012},
	pages = {14--19},
	annote = {Summary
Common PDF exploits listed as: JS, Flash (ActionScript), TIFF image objects, XFA streams. Refers to CVS-2009-4324. Gives JS example of heap spray/NOP-slide.
Evasion Techniques listed: string splitting, split into objects and combined later (small data chunk concatenation); encryption, multi-level encoding.
Mentions MDScan and Wepawet
"PDF Filter" = ML to differentiate benign from malicious PDF.
Features for malicious: lots of /JS, JS obfuscation, specific JS functions (list given); PDF Actions; PDF filters (multiple); malformed PDF structure; statistics on PDF objects
Corpus: training = 25K w/ 5K malicious; evaluation 157K
 },
	file = {Xu et al. - 2012 - A Fast and Precise Malicious PDF Filter.pdf:/Users/tullsen/Zotero/storage/4W2J2DXV/Xu et al. - 2012 - A Fast and Precise Malicious PDF Filter.pdf:application/pdf},
}

@inproceedings{joHTMLPDFFuzzing2016,
	address = {New York, NY, USA},
	series = {{IMCOM} '16},
	title = {{HTML} and {PDF} {Fuzzing} {Methodology} in {iOS}},
	isbn = {978-1-4503-4142-4},
	url = {http://doi.acm.org/10.1145/2857546.2857555},
	doi = {10.1145/2857546.2857555},
	booktitle = {Proceedings of the 10th {International} {Conference} on {Ubiquitous} {Information} {Management} and {Communication}},
	publisher = {ACM},
	author = {Jo, Je-Gyeong and Ryou, Jae-cheol},
	year = {2016},
	note = {event-place: Danang, Viet Nam},
	keywords = {PDF, Fuzzing, HTML, iOS, Jailbreak, MobileSafari},
	pages = {8:1--8:5},
	file = {Jo and Ryou - 2016 - HTML and PDF Fuzzing Methodology in iOS.pdf:/Users/tullsen/Zotero/storage/CI77NND5/Jo and Ryou - 2016 - HTML and PDF Fuzzing Methodology in iOS.pdf:application/pdf},
}

@inproceedings{coronaLux0RDetectionMalicious2014,
	address = {New York, NY, USA},
	series = {{AISec} '14},
	title = {{Lux0R}: {Detection} of {Malicious} {PDF}-embedded {JavaScript} {Code} {Through} {Discriminant} {Analysis} of {API} {References}},
	isbn = {978-1-4503-3153-1},
	url = {http://doi.acm.org/10.1145/2666652.2666657},
	doi = {10.1145/2666652.2666657},
	booktitle = {Proceedings of the 2014 {Workshop} on {Artificial} {Intelligent} and {Security} {Workshop}},
	publisher = {ACM},
	author = {Corona, Igino and Maiorca, Davide and Ariu, Davide and Giacinto, Giorgio},
	year = {2014},
	note = {event-place: Scottsdale, Arizona, USA},
	keywords = {adversarial machine learning, javascript code, malware detection, mimicry attacks, pdf documents},
	pages = {47--57},
	file = {Corona et al. - 2014 - Lux0R Detection of Malicious PDF-embedded JavaScr.pdf:/Users/tullsen/Zotero/storage/VMWMN2UC/Corona et al. - 2014 - Lux0R Detection of Malicious PDF-embedded JavaScr.pdf:application/pdf},
}

@inproceedings{maiorcaLookingBagNot2013,
	address = {New York, NY, USA},
	series = {{ASIA} {CCS} '13},
	title = {Looking at the {Bag} is {Not} {Enough} to {Find} the {Bomb}: {An} {Evasion} of {Structural} {Methods} for {Malicious} {PDF} {Files} {Detection}},
	isbn = {978-1-4503-1767-2},
	url = {http://doi.acm.org/10.1145/2484313.2484327},
	doi = {10.1145/2484313.2484327},
	booktitle = {Proceedings of the 8th {ACM} {SIGSAC} {Symposium} on {Information}, {Computer} and {Communications} {Security}},
	publisher = {ACM},
	author = {Maiorca, Davide and Corona, Igino and Giacinto, Giorgio},
	year = {2013},
	note = {event-place: Hangzhou, China},
	keywords = {detection evasion, machine learning, pdf malware detection, reverse mimicry},
	pages = {119--130},
	file = {Maiorca et al. - 2013 - Looking at the Bag is Not Enough to Find the Bomb.pdf:/Users/tullsen/Zotero/storage/ZAY9PXKL/Maiorca et al. - 2013 - Looking at the Bag is Not Enough to Find the Bomb.pdf:application/pdf},
}

@inproceedings{laskovStaticDetectionMalicious2011,
	address = {New York, NY, USA},
	series = {{ACSAC} '11},
	title = {Static {Detection} of {Malicious} {JavaScript}-bearing {PDF} {Documents}},
	isbn = {978-1-4503-0672-0},
	url = {http://doi.acm.org/10.1145/2076732.2076785},
	doi = {10.1145/2076732.2076785},
	booktitle = {Proceedings of the 27th {Annual} {Computer} {Security} {Applications} {Conference}},
	publisher = {ACM},
	author = {Laskov, Pavel and Šrndić, Nedim},
	year = {2011},
	note = {event-place: Orlando, Florida, USA},
	keywords = {malware detection, machine learning, malicious JavaScript, PDF documents},
	pages = {373--382},
	file = {Laskov and Šrndić - 2011 - Static Detection of Malicious JavaScript-bearing P.pdf:/Users/tullsen/Zotero/storage/9PVHCMYC/Laskov and Šrndić - 2011 - Static Detection of Malicious JavaScript-bearing P.pdf:application/pdf},
}

@inproceedings{smutzMaliciousPDFDetection2012,
	address = {New York, NY, USA},
	series = {{ACSAC} '12},
	title = {Malicious {PDF} {Detection} {Using} {Metadata} and {Structural} {Features}},
	isbn = {978-1-4503-1312-4},
	url = {http://doi.acm.org/10.1145/2420950.2420987},
	doi = {10.1145/2420950.2420987},
	booktitle = {Proceedings of the 28th {Annual} {Computer} {Security} {Applications} {Conference}},
	publisher = {ACM},
	author = {Smutz, Charles and Stavrou, Angelos},
	year = {2012},
	note = {event-place: Orlando, Florida, USA},
	pages = {239--248},
	file = {Smutz and Stavrou - 2012 - Malicious PDF Detection Using Metadata and Structu.pdf:/Users/tullsen/Zotero/storage/PMZ73DIR/Smutz and Stavrou - 2012 - Malicious PDF Detection Using Metadata and Structu.pdf:application/pdf},
}

@inproceedings{liuDetectingMaliciousJavascript2014,
	title = {Detecting {Malicious} {Javascript} in {PDF} through {Document} {Instrumentation}},
	doi = {10.1109/DSN.2014.92},
	abstract = {An emerging threat vector, embedded malware inside popular document formats, has become rampant since 2008. Owed to its wide-spread use and Javascript support, PDF has been the primary vehicle for delivering embedded exploits. Unfortunately, existing defenses are limited in effectiveness, vulnerable to evasion, or computationally expensive to be employed as an on-line protection system. In this paper, we propose a context-aware approach for detection and confinement of malicious Javascript in PDF. Our approach statically extracts a set of static features and inserts context monitoring code into a document. When an instrumented document is opened, the context monitoring code inside will cooperate with our runtime monitor to detect potential infection attempts in the context of Javascript execution. Thus, our detector can identify malicious documents by using both static and runtime features. To validate the effectiveness of our approach in a real world setting, we first conduct a security analysis, showing that our system is able to remain effective in detection and be robust against evasion attempts even in the presence of sophisticated adversaries. We implement a prototype of the proposed system, and perform extensive experiments using 18623 benign PDF samples and 7370 malicious samples. Our evaluation results demonstrate that our approach can accurately detect and confine malicious Javascript in PDF with minor performance overhead.},
	booktitle = {2014 44th {Annual} {IEEE}/{IFIP} {International} {Conference} on {Dependable} {Systems} and {Networks}},
	author = {Liu, D. and Wang, H. and Stavrou, A.},
	month = jun,
	year = {2014},
	keywords = {PDF, Portable document format, Feature extraction, Malware, document handling, feature extraction, invasive software, Context, context monitoring code, context-aware approach, document format, document instrumentation, embedded malware, emerging threat vector, evasion attempt, Instruments, Java, Javascript execution, Javascript support, Malcode bearing PDF, malicious document identification, malicious Javascript, malicious Javascript confinement, malicious Javascript detection, malware detection and confinement, Monitoring, online protection system, potential infection attempt detection, Runtime, runtime feature, runtime monitoring, security analysis, sophisticated adversaries, static feature extraction, ubiquitous computing},
	pages = {100--111},
	file = {Liu et al. - 2014 - Detecting Malicious Javascript in PDF through Docu.pdf:/Users/tullsen/Zotero/storage/PWNKGJ99/Liu et al. - 2014 - Detecting Malicious Javascript in PDF through Docu.pdf:application/pdf},
}

@inproceedings{nathEnsembleLearningDetection2015,
	title = {Ensemble learning for detection of malicious content embedded in {PDF} documents},
	doi = {10.1109/SPICES.2015.7091371},
	abstract = {Portable Document Format (PDF) is used as a defacto standard for sharing documents. Even though pdf is a document description language, it has lot of features similar to programming language. With the add on support of JavaScript (Malicious script) and the facility to embed any file into a PDF document, creates a big potential for disastrous cyber attacks. From 2008 onwards, the malicious users are concentrating more on embedding malicious codes into pdf documents. Compared to PE, pdf files pose higher risk since the embedded content can be encrypted and/or encoded. Recently multistage delivery of malware is used for APTs and targeted attacks. Here pdf documents are used for accomplishing one or more stages, like mini-duke, where pdf file was used for first stage. It went undetected for almost two years. These files could be considered as a carrier of k-ary codes. In this paper, we bring out the importance of analyzing the data encoded in the stream tag along with other structural information. We are giving a proof of concept by embedding JavaScript into PDF document. This is not detected by any of the existing pdf parsers. Finally, we propose ensemble learning for detecting such pdf files.},
	booktitle = {2015 {IEEE} {International} {Conference} on {Signal} {Processing}, {Informatics}, {Communication} and {Energy} {Systems} ({SPICES})},
	author = {Nath, H. V. and Mehtre, B. M.},
	month = feb,
	year = {2015},
	keywords = {Portable document format, Feature extraction, Malware, portable document format, learning (artificial intelligence), Portable Document Format (PDF), document handling, Malicious JavaScript, APTs, invasive software, PDF documents, Java, authoring languages, cryptography, data analysis, data encoding, disastrous cyber attacks, document description language, Encryption, ensemble learning, Ensemble Learning, Entropy, JavaScript, k-ary codes, malicious codes, malicious content detection, malicious script, malware, Multi-Stage Attack, programming language, Proof of Concept, structural information},
	pages = {1--5},
	file = {Nath and Mehtre - 2015 - Ensemble learning for detection of malicious conte.pdf:/Users/tullsen/Zotero/storage/ZD9QGP8J/Nath and Mehtre - 2015 - Ensemble learning for detection of malicious conte.pdf:application/pdf},
}

@inproceedings{iwamotoStudyMaliciousPDF2016,
	title = {A {Study} of {Malicious} {PDF} {Detection} {Technique}},
	doi = {10.1109/CISIS.2016.45},
	abstract = {PDF files are considered to be safe for the static file format. On the other hand, PDF files can be executed using various codes as a malware. Previous researches have conducted to find the PDF format used as a malware, have not extracted the features of PDF. In this paper, we find the features of the new malware using actual PDF malware files, then investigate to find the useful features to detect malware. Finally, we examine the usefulness to detect real malware document in terms of the malfunction and proposed features.},
	booktitle = {2016 10th {International} {Conference} on {Complex}, {Intelligent}, and {Software} {Intensive} {Systems} ({CISIS})},
	author = {Iwamoto, M. and Oshima, S. and Nakashima, T.},
	month = jul,
	year = {2016},
	keywords = {Portable document format, Feature extraction, Malware, PDF format, Security, Dictionaries, document handling, Malware Detection, invasive software, Encryption, Libraries, Malicious PDF, malicious PDF detection technique, malware document detection, PDF Document, PDF malware files, static file format},
	pages = {197--203},
	file = {Iwamoto et al. - 2016 - A Study of Malicious PDF Detection Technique.pdf:/Users/tullsen/Zotero/storage/QLUNJYY3/Iwamoto et al. - 2016 - A Study of Malicious PDF Detection Technique.pdf:application/pdf},
}

@article{davidemaiorcaDigitalInvestigationPDF2017,
	title = {Digital {Investigation} of {PDF} {Files}: {Unveiling} {Traces} of {Embedded} {Malware}},
	url = {http://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=7854112},
	abstract = {Over the last decade, malicious software (or malware, for short) has shown an increasing sophistication and proliferation, fueled by a flourishing underground economy, in response to the increasing complexity of modern defense mechanisms. PDF documents are among the major vectors used to convey malware, thanks to the flexibility of their structure and the ability of embedding different kinds of content, ranging from images to JavaScript code. Despite the numerous efforts made by the research and industrial communities, PDF malware is still one of the major threats on the cyber-security landscape. In this paper, we provide an overview of the current attack techniques used to convey PDF malware, and discuss state-of-the-art PDF malware analysis tools that provide valuable support to digital forensic investigations. We finally discuss limitations and open issues of the current defense mechanisms, and sketch some interesting future research directions.},
	number = {Special Issue on Digital Forensics},
	journal = {IEEE Security \& Privacy magazine},
	author = {{Davide Maiorca} and {Battista Biggio}},
	month = dec,
	year = {2017},
	file = {Davide Maiorca - 2017 - Digital Investigation of PDF Files Unveiling Trac.pdf:/Users/tullsen/Zotero/storage/EM245U6V/Davide Maiorca - 2017 - Digital Investigation of PDF Files Unveiling Trac.pdf:application/pdf},
}

@inproceedings{hamonMaliciousURIResolving2012,
	title = {Malicious {URI} resolving in {PDFs}},
	url = {https://media.blackhat.com/ad-12/Hamon/bh-ad-12-malicious%20URI-Hamon-Slides.pdf},
	publisher = {Black Hat, Abu Dhabi},
	author = {Hamon, Valentin},
	year = {2012},
	file = {Valentin Hamon - 2012 - Malicious URI resolving in PDFs.pdf:/Users/tullsen/Zotero/storage/VXM333XF/Valentin Hamon - 2012 - Malicious URI resolving in PDFs.pdf:application/pdf},
}

@article{magaziniusPolyglotsCrossingOrigins2013,
	title = {Polyglots: {Crossing} {Origins} by {Crossing} {Formats}},
	doi = {http://dx.doi.org/10.1145/2508859.2516685},
	abstract = {In a heterogeneous system like the web, information is ex- changed between components in versatile formats. A new breed of attacks is on the rise that exploit the mismatch between the expected and provided content. This paper focuses on the root cause of a large class of attacks: poly- glots. A polyglot is a program that is valid in multiple pro- gramming languages. Polyglots allow multiple interpreta- tion of the content, providing a new space of attack vec- tors. We characterize what constitutes a dangerous format in the web setting and identify particularly dangerous for- mats, with PDF as the prime example. We demonstrate that polyglot-based attacks on the web open up for insecure communication across Internet origins. The paper presents novel attack vectors that in ltrate the trusted origin by syn- tax injection across multiple languages and by content smug- gling of malicious payload that appears formatted as benign content. The attacks lead to both cross-domain leakage and cross-site request forgery. We perform a systematic study of PDF-based injection and content smuggling attacks. We evaluate the current practice in client/server content  lter- ing and PDF readers for polyglot-based attacks, and report on vulnerabilities in the top 100 Alexa web sites. We identify  ve web sites to be vulnerable to syntax injection attacks. Further, we have found two major enterprise cloud storage services to be susceptible to content smuggling attacks. Our recommendations for protective measures on server side, in browsers, and in content interpreters (in particular, PDF readers) show how to mitigate the attacks.},
	journal = {ACM CCS'13},
	author = {Magazinius, Jonas and Rios, Billy K. and Sabelfeld, Andrei},
	month = nov,
	year = {2013},
	file = {pocorgtfo03.pdf:/Users/tullsen/Zotero/storage/8KK7VXTR/pocorgtfo03.pdf:application/pdf;Jonas Magazinius et al. - 2013 - Polyglots Crossing Origins by Crossing Formats.pdf:/Users/tullsen/Zotero/storage/NTI7SX9N/Jonas Magazinius et al. - 2013 - Polyglots Crossing Origins by Crossing Formats.pdf:application/pdf},
}

@inproceedings{blondBroadViewEcosystem2017,
	title = {A {Broad} {View} of the {Ecosystem} of {Socially} {Engineered} {Exploit} {Documents}},
	url = {https://www.semanticscholar.org/paper/A-Broad-View-of-the-Ecosystem-of-Socially-Exploit-Blond-Gilbert/df4eae372eada9d8b23131703b9203bcf49de2fc},
	doi = {10.1016/j.jss.2010.04.062},
	abstract = {The Portable Document Format (PDF) was developed by Adobe in the early nineties and today it is the defacto standard for electronic document exchange. It allows reliable reproductions of published materials on any platform and it is used by many governmental and educational institutions, as well as companies and individuals. PDF documents are also credited with being more secure than other document formats such as Microsoft Compound Document File Format or Rich Text Format. This paper investigates the Portable Document Format and shows that it is not immune from some privacy related issues that affect other popular document formats. From a PDF document, it is possible to retrieve any text or object previously deleted or modified, extract user information and perform some},
	booktitle = {{NDSS}},
	author = {Blond, Stevens Le and Gilbert, Cédric and Upadhyay, Utkarsh and Gomez-Rodriguez, Manuel and Choffnes, David R.},
	month = mar,
	year = {2017},
	keywords = {Portable Document Format, Privacy},
	annote = {Summary
"VirusTotal receives over 12 million (non-executable) document submissions per year"},
	file = {Blond et al. - 2017 - A Broad View of the Ecosystem of Socially Engineer.pdf:/Users/tullsen/Zotero/storage/ICS8HELI/Blond et al. - 2017 - A Broad View of the Ecosystem of Socially Engineer.pdf:application/pdf},
}

@inproceedings{cuanMalwareDetectionPDF2018,
	title = {Malware {Detection} in {PDF} {Files} using {Machine} {Learning}},
	url = {https://www.semanticscholar.org/paper/Malware-Detection-in-PDF-Files-Using-Machine-Cuan-Damien/6b3626fc80cbe9c5cd68389d6f732da4718a43ca},
	doi = {10.1016/j.jss.2010.04.062},
	abstract = {The Portable Document Format (PDF) was developed by Adobe in the early nineties and today it is the defacto standard for electronic document exchange. It allows reliable reproductions of published materials on any platform and it is used by many governmental and educational institutions, as well as companies and individuals. PDF documents are also credited with being more secure than other document formats such as Microsoft Compound Document File Format or Rich Text Format. This paper investigates the Portable Document Format and shows that it is not immune from some privacy related issues that affect other popular document formats. From a PDF document, it is possible to retrieve any text or object previously deleted or modified, extract user information and perform some},
	booktitle = {{ICETE}},
	author = {Cuan, Bonan and Damien, Aliénor and Delaplace, Claire and Valois, Mathieu},
	year = {2018},
	keywords = {Portable Document Format, Privacy},
	file = {Cuan et al. - 2018 - Malware Detection in PDF Files using Machine Learn.pdf:/Users/tullsen/Zotero/storage/BVD3E59L/Cuan et al. - 2018 - Malware Detection in PDF Files using Machine Learn.pdf:application/pdf},
}

@article{mavricOnlineBinaryVisualization2018,
	title = {Online binary visualization for {PDF} documents},
	url = {https://www.semanticscholar.org/paper/Online-binary-visualization-for-Pdf-documents-Mavric-Yeo/b8169c31f6d9215ced39a6165fac9edf315c879f},
	doi = {10.1016/j.jss.2010.04.062},
	abstract = {The Portable Document Format (PDF) was developed by Adobe in the early nineties and today it is the defacto standard for electronic document exchange. It allows reliable reproductions of published materials on any platform and it is used by many governmental and educational institutions, as well as companies and individuals. PDF documents are also credited with being more secure than other document formats such as Microsoft Compound Document File Format or Rich Text Format. This paper investigates the Portable Document Format and shows that it is not immune from some privacy related issues that affect other popular document formats. From a PDF document, it is possible to retrieve any text or object previously deleted or modified, extract user information and perform some},
	journal = {2018 International Symposium on Consumer Technologies (ISCT)},
	author = {Mavric, Soon Heng Tan and Yeo, Chai Kiat},
	year = {2018},
	keywords = {Portable Document Format, Privacy},
	pages = {18--21},
	file = {Mavric and Yeo - 2018 - Online binary visualization for Pdf documents.pdf:/Users/tullsen/Zotero/storage/ZE4ANH3Q/Mavric and Yeo - 2018 - Online binary visualization for Pdf documents.pdf:application/pdf},
}

@inproceedings{khadamWatermarkingTechniqueBased2019,
	title = {A {Watermarking} {Technique} {Based} on {File} {Page} {Objects} for {PDF}},
	url = {https://ieeexplore.ieee.org/document/8985129},
	doi = {10.1109/PACRIM47961.2019.8985129},
	abstract = {This paper presents a novel algorithm which is based on the Portable Document Format (PDF) document structure. The PDF document page objects are utilized for watermarking. The secret message is compressing by using Huffman coding, then suitable page objects of PDF document are used for hiding the secret message. Since the embedded watermark information is stored in PDF document page objects, so it will not affect the content, as well as the format of the PDF documents. The experimental result shows that the proposed algorithm is robust with detection accuracy can be up to 97\%. The watermark is extracted with high probability, which illustrates that the proposed algorithm is robust and imperceptible. The experiments prove that the proposed method is suitable for PDF documents protection.},
	booktitle = {2019 {IEEE} {Pacific} {Rim} {Conference} on {Communications}, {Computers} and {Signal} {Processing} ({PACRIM})},
	author = {Khadam, Umair and Iqbal, Muhammad Munwar and Habib, Muhammad Asif and Han, Kijun},
	month = aug,
	year = {2019},
	note = {ISSN: 2325-0445},
	keywords = {Steganography, Portable Document Format, data compression, Data Hiding, document image processing, embedded watermark information, file page, Huffman codes, image coding, PDF document page objects, PDF documents protection, portable document format document structure, secret message, watermarking, Watermarking, watermarking technique},
	pages = {1--5},
	file = {Khadam et al. - 2019 - A Watermarking Technique Based on File Page Object.pdf:/Users/tullsen/Zotero/storage/PW2UIGY6/Khadam et al. - 2019 - A Watermarking Technique Based on File Page Object.pdf:application/pdf},
}

@misc{dankaminskyPKILayerCake2009,
	title = {{PKI} {Layer} {Cake}: {New} {Collision} {Attacks} {Against} {The} {Global} {X}.509 {CA} {Infrastructure}},
	shorttitle = {{PKI} {Layer} {Cake}},
	url = {https://ioactive.com/pdfs/PKILayerCake.pdf},
	abstract = {Research unveiled in December of 2008 showed how MD5’s long-known flaws could be actively exploited to attack the real-world Certification Authority infrastructure. In this paper, we demonstrate two new classes of collision, which unfortunately will be somewhat trickier to address than previous attacks against X.509: The applicability of MD2 preimage attacks against the primary root certificate for VeriSign, and the difficulty of validating X.509 Names contained within PKCS\#10 Certificate Requests. We also call out two possibly unrecognized vectors for implementation flaws that have been problematic in the past: The ASN.1 BER decoder required to parse PKCS\#10, and the potential for SQL injection from text contained within its requests. Finally, we seek to remind people that the implications of these attacks are a little larger than some have realized – first, because Client Authentication is sometimes tied to X.509, and second because Extended Validation certificates were only intended to stop phishing attacks from names similar to trusted brands. As per the work of Adam Barth and Collin Jackson, EV does not in fact prevent an attacker who can synthesize or acquire a “low assurance” certificate for a given name from acquiring the “green bar” EV experience.},
	language = {English},
	author = {{Dan Kaminsky} and {Len Sassaman} and {Meredith Patterson}},
	month = aug,
	year = {2009},
	file = {Snapshot:/Users/tullsen/Zotero/storage/7QRQ2Y4P/19.html:text/html;Snapshot:/Users/tullsen/Zotero/storage/6CEV5GYR/17.html:text/html;Snapshot:/Users/tullsen/Zotero/storage/UNEQ46RY/18.html:text/html;Dan Kaminsky et al. - 2009 - PKI Layer Cake New Collision Attacks Against The .pdf:/Users/tullsen/Zotero/storage/XMHHDMLB/Dan Kaminsky et al. - 2009 - PKI Layer Cake New Collision Attacks Against The .pdf:application/pdf;article-1432.pdf:/Users/tullsen/Zotero/storage/SJLN42R4/article-1432.pdf:application/pdf},
}

@misc{symeonGrammarBasedFuzzing,
	title = {Grammar based fuzzing {PDFs} with {Domato}},
	url = {https://symeonp.github.io/2020/04/18/grammar-based-fuzzing.html},
	abstract = {Welcome back to another fuzzing blog post. This time let’s talk about grammar based fuzzing! I will be writing about how I tried to fuzz a few PDF software such as Foxit and Adobe.
In order to do that, I used the following tools:
    domato, grab it from its repo while it’s fresh!
    Debenu Quick PDF Library, for my campaign the current version as of writing this is 17.11 but YMMV, please note that you need to register in order to request a trial.
    BugId to help us triage any crashes/save crashers.
    Your favourite PDF parser/software!
So here’s the idea: We will be installing the Debenu Quick PDF library and taking advantage of its SDK and functions. Why grammar based on a massive complex format such as a PDF you say? Remember that the PDF file format includes text, images, multimedia, JavaScript and has very complex parsing code. As such, although a smart guided fuzzer such as Checkpoint’s research can be used, we can take advantage of this library which provides a ton of features from messing with HTML objects to adding images, fonts, or even adding custom javascript!},
	language = {English},
	urldate = {2020-04-22},
	journal = {Grammar based fuzzing PDFs with Domato},
	author = {{Symeon}},
}

@inproceedings{sergeevReviewAlgorithmsSteganography2020,
	address = {Moscow, Russia},
	title = {Review of the algorithms steganography in {PDF} documents and analysis},
	url = {https://ieeexplore.ieee.org/abstract/document/9067465},
	doi = {10.1109/MWENT47943.2020.9067465},
	abstract = {In this article, we will explore methods of using data hiding in a pdf document. The widespread use of PDF files can make its use for this purpose an interesting and practical solution. We was explore of algorithms of steganography algorithms from open sources and based on the analysis, an algorithm for detecting steganography proposed. We explore the methods to use a hide data in structure of the pdf document format, the basic strategies for hiding data, the statistical characteristics of PDF operators, we analysis the detection of changes after applying the algorithm for hiding data.},
	booktitle = {2020 {Moscow} {Workshop} on {Electronic} and {Networking} {Technologies} ({MWENT})},
	publisher = {IEEE},
	author = {Sergeev, Alexander V. and Khorev, Pavel. B.},
	month = mar,
	year = {2020},
	keywords = {Portable document format, Conferences, Steganography, Standards, Tools, Containers, Data transfer, Hardware, Information security, Mathematical analysis},
	pages = {1--4},
	file = {Sergeev and Khorev - 2020 - Review of the algorithms steganography in PDF docu.pdf:/Users/tullsen/Zotero/storage/MUPH8TUU/Sergeev and Khorev - 2020 - Review of the algorithms steganography in PDF docu.pdf:application/pdf},
}

@misc{PDF2008,
	title = {{PDF}/{AS}},
	shorttitle = {{PDF}/{AS}},
	url = {https://joinup.ec.europa.eu/solution/pdf/about},
	abstract = {PDF-AS is able to create visual signature blocks according to the Austrian Amtssignatur Layout. PDF-AS is a java framework for creating digitial signatures on PDF documents. PDF-AS is also capable of verifying digitally signed PDF documents.
From version 4.0 onwards PDF-AS supports only PDF Advanced Electronic Signatures (PAdES). As signature creation device a citizen card environment (CCE), a MOA-SS ({\textgreater} 2.0), or a software keystore (PKCS12, Java Key Store) can be used. To perform a full certificate verification during the signature verification process PDF-AS needs a MOA-SP ({\textgreater}2.0).
PDF-AS can be used directly within a Java application as a library, or via Web or Soap Interfaces as an external service.
To use PDF-AS as a java library checkout the API documentation located at: or the full javadoc documentation at: https://apps.egiz.gv.at/handbooks/pdf-as/doc/
To use PDF-AS as an external service checkout the documentation located at: https://apps.egiz.gv.at/handbooks/pdf-as/doc/
We provide a maven repository here:https://apps.egiz.gv.at/releases/pdf-as/repo/
Actual releases and documentation can be found here: https://apps.egiz.gv.at/releases/pdf-as/release/
The source code is available here: http://git.egiz.gv.at/pdf-as-4/
PDF-AS is used in commercial products, like for example primesign (https://www.prime-sign.com/), and it is used by the Austrian Federal Computing Centre (BRZ) to sign official documents.},
	language = {English/German},
	publisher = {Austrian E-Government Innovation Center (EGIZ)},
	month = apr,
	year = {2008},
	note = {Multiple releases since 2008},
}

@inproceedings{kingFormatDesignCase2004,
	address = {Santa Cruz, CA, USA},
	series = {{HYPERTEXT} '04},
	title = {A format design case study: {PDF}},
	isbn = {978-1-58113-848-1},
	shorttitle = {A format design case study},
	url = {https://doi.org/10.1145/1012807.1012810},
	doi = {10.1145/1012807.1012810},
	abstract = {We explain how the Portable Document Format was designed based upon some specific design criteria that were developed to make an advance beyond previous technology. The environmental variables (computing power, business climate) that affected the design are also discussed.},
	urldate = {2020-04-11},
	booktitle = {Proceedings of the fifteenth {ACM} conference on {Hypertext} and hypermedia},
	publisher = {Association for Computing Machinery},
	author = {King, James C.},
	month = aug,
	year = {2004},
	keywords = {PDF, HTML, document format, compound document, electronic document, file format, formatting, layout, SGML},
	pages = {95--97},
	file = {King - 2004 - A format design case study PDF.pdf:/Users/tullsen/Zotero/storage/PXLECDNL/King - 2004 - A format design case study PDF.pdf:application/pdf},
}

@misc{GeoPDF2009,
	title = {{GeoPDF} 2.2},
	shorttitle = {{GeoPDF}},
	url = {http://www.terragotech.com/},
	abstract = {GeoPDF refers to map and imagery products created by TerraGo software applications (http://www.terragotech.com/). GeoPDF products use geospatial PDF as a container for maps, imagery, and other data used to deliver an enhanced user experience in TerraGo applications. However, GeoPDF products conform to published specifications including both the OGC best practice for PDF georegistration as well as Adobe's geospatial extensions to ISO 32000-1:2008 (see “Adobe Supplement to the ISO 32000-1:2008 Base Version 1.7 Extension Level 3”), making them consumable by applications such as Adobe Acrobat, Adobe Reader, Global Mapper, and others.  A comparison of GeoPDF vs "geospatial PDF" is available at http://www.terragotech.com/products/terrago-publisher/geopdf-vs-geospatial-pdf

GeoPDF products often include other advanced PDF features such as layers and object data which can add significant GIS functionality to the file, particularly when used with the TerraGo Technologies plugin to Adobe Reader or other TerraGo clients.},
	language = {English},
	publisher = {TerraGo Technologies Inc. (USA)},
	month = apr,
	year = {2009},
	note = {OGC 08-139r2},
	annote = {URLs to GeoPDF information
https://www.loc.gov/preservation/digital/formats/fdd/fdd000312.shtml
http://portal.ogc.org/files/?artifact\_id=33332\&passcode=hw52vzrmngp5m2s2cxy8
http://web.archive.org/web/20130214180326if\_/http://www.agc.army.mil/fact\_sheet/GeoPDF\_Fact\_Sheet.pdf
https://apps.dtic.mil/dtic/tr/fulltext/u2/a560231.pdf 
 },
	file = {a560231.pdf:/Users/tullsen/Zotero/storage/E6FVUGVP/a560231.pdf:application/pdf;GeoPDF_Fact_Sheet.pdf:/Users/tullsen/Zotero/storage/H4YS5N8Z/GeoPDF_Fact_Sheet.pdf:application/pdf;08-139r2_GeoPDF_Encoding_Best_Practice_Version_2.2.pdf:/Users/tullsen/Zotero/storage/4B9HX8IW/08-139r2_GeoPDF_Encoding_Best_Practice_Version_2.2.pdf:application/pdf},
}

@inproceedings{michellelindlarPDFTestSetForWellFormedness2017,
	address = {Kyoto, Japan},
	title = {A {PDF} {Test}-{Set} {forWell}-{Formedness} {Validation} in {JHOVE} - {The} {Good}, the {Bad} and the {Ugly}},
	abstract = {Digital preservation and active software stewardship are both cyclical processes. While digital preservation strategies have to be reevaluated regularly to ensure that they still meet technological and organizational requirements, software needs to be tested with every new release to ensure that it functions correctly. JHOVE is an open source format validation tool which plays a central role in many digital preservation workflows and the PDF module is one of its most important features. Unlike tools such as Adobe PreFlight or veraPDF which check against requirements at profile level, JHOVE’s PDF-module is the only tool that can validate the syntax and structure of PDF files. Despite JHOVE’s widespread and long-standing adoption, the underlying validation rules are not formally or thoroughly tested, leading to bugs going undetected for a long time. Furthermore, there is no ground-truth data set which can be used to understand and test PDF validation at the structural level. The authors present a corpus of light-weight files designed to test the validation criteria of JHOVE’s PDF module against “well-formedness”. We conclude by measuring the code coverage of the test corpus within JHOVE PDF validation and by feeding detected inconsistencies of the PDF-module back into the open source development process.},
	language = {English},
	booktitle = {Proceedings of {iPRES} {Conference}, {Kyoto}, {Japan}, {September} 2017 ({iPRES} 2017)},
	author = {{Michelle Lindlar} and {Yvonne Tunnat} and {Carl Wilson}},
	year = {2017},
	pages = {11},
	file = {Michelle Lindlar et al. - A PDF Test-Set forWell-Formedness Validation in JH.pdf:/Users/tullsen/Zotero/storage/5SZILQ9W/Michelle Lindlar et al. - A PDF Test-Set forWell-Formedness Validation in JH.pdf:application/pdf},
}

@inproceedings{reckerFontRenderingGPUbased2010,
	title = {Font rendering on a {GPU}-based raster image processor},
	volume = {7528},
	url = {https://www.spiedigitallibrary.org/conference-proceedings-of-spie/7528/75280A/Font-rendering-on-a-GPU-based-raster-image-processor/10.1117/12.839486.short},
	doi = {10.1117/12.839486},
	abstract = {Historically, in the 35 years of digital printing research, raster image processing has always lagged behind marking engine technology, i.e., we have never been able to deliver rendered digital pages as fast as digital print engines can consume them. This trend has resulted in products based on throttled digital printers or expensive raster image processors (RIP) with hardware acceleration. The current trend in computer software architecture is to leverage graphic processing units (GPU) for computing tasks whenever appropriate. We discuss the issues for rendering fonts on such an architecture and present an implementation.},
	language = {English},
	urldate = {2019-04-22},
	booktitle = {Color {Imaging} {XV}: {Displaying}, {Processing}, {Hardcopy}, and {Applications}},
	publisher = {International Society for Optics and Photonics},
	author = {Recker, John L. and Beretta, Giordano B. and Lin, I.-Jong},
	month = jan,
	year = {2010},
	note = {HP Labs},
	pages = {75280A},
	file = {Recker et al. - 2010 - Font rendering on a GPU-based raster image process.pdf:/Users/tullsen/Zotero/storage/KT7UJNI7/Recker et al. - 2010 - Font rendering on a GPU-based raster image process.pdf:application/pdf},
}

@misc{zhaileiXMLParsingAccelerator2015,
	title = {{XML} {Parsing} {Accelerator} with {Intel}® {Streaming} {SIMD} {Extensions} 4 ({Intel}® {SSE4})},
	shorttitle = {{STTNI} extensions for {XML} parsing},
	url = {https://software.intel.com/en-us/articles/xml-parsing-accelerator-with-intel-streaming-simd-extensions-4-intel-sse4},
	abstract = {This white paper will describe how Intel XML parsing can benefit from Intel® Streaming SIMD Extensions 4 (Intel® SSE4), a new set of Single Instruction Multiple Data (SIMD) instructions designed to improve the performance of various applications, such as video encoders, image processing, 3D games, and string/text processing.},
	language = {English},
	urldate = {2019-04-22},
	author = {{Zhai Lei}},
	month = jan,
	year = {2015},
	note = {Intel USA},
	annote = {Summary
Explains how to use the Intel SSE 4.2 STTNI instruction set to speed up parsing XML.
It is not difficult to see how to extend to apply to parsing PDF...},
}

@inproceedings{juhalehtonenPDFMayhemBroken2018,
	address = {Boston \& Cambridge, Massachusetts},
	title = {{PDF} {Mayhem}: {Is} {Broken} {Really} {Broken}?},
	url = {http://digitalpreservation.fi/files/ipres2018_402-2_pdf_mayhem_lehtonen_et_al.pdf},
	doi = {10.17605/OSF.IO/U5W3Q},
	abstract = {In this paper, we focus on the quality of PDF files. We are interested in errors that validators report during the validation process: how accurate are these errors and can we build easy workarounds to avoid or even fix these problems? We present our findings from a pilot experiment where we validated more than 200,000 PDF files from well-known corpora with different validators and found several thousand problematic files. We then devised a process of reconstructing the invalid files and analyzing the converted data. Our results show that there are potentially working methods for avoiding problems during the PDF validation and these methods can significantly reduce the workload for preservation specialists who are responsible for the quality of the data. Our further aim is to master and manage PDF validation so that we can build an automated workflow which is able to migrate most of PDF files to PDF/A files during the ingest of a digital preservation repository. To achieve this in reliable manner we need further studies to build on what we have presented here.},
	language = {English},
	booktitle = {Proceedings of {IPRES} 2018},
	publisher = {OSF https://osf.io/u5w3q/},
	author = {{Juha Lehtonen} and {Heikki Helin} and {Johan Kylander} and {Kimmo Koivunen}},
	month = sep,
	year = {2018},
	file = {402-2_ PDF Mayhem.pdf:/Users/tullsen/Zotero/storage/RCM8R7ZB/402-2_ PDF Mayhem.pdf:application/pdf;Juha Lehtonen et al. - 2018 - PDF Mayhem Is Broken Really Broken.pdf:/Users/tullsen/Zotero/storage/NQBK7E5V/Juha Lehtonen et al. - 2018 - PDF Mayhem Is Broken Really Broken.pdf:application/pdf},
}

@article{ambroseTopologicalDifferentialTesting2020,
	title = {Topological {Differential} {Testing}},
	url = {http://arxiv.org/abs/2003.00976},
	abstract = {We introduce topological differential testing (TDT), an approach to extracting the consensus behavior of a set of programs on a corpus of inputs. TDT uses the topological notion of a simplicial complex (and implicitly draws on richer topological notions such as sheaves and persistence) to determine inputs that cause inconsistent behavior and in turn reveal {\textbackslash}emph\{de facto\} input specifications. We gently introduce TDT with a toy example before detailing its application to understanding the PDF file format from the behavior of various parsers. Finally, we discuss theoretical details and other possible applications.},
	urldate = {2020-03-27},
	journal = {arXiv:2003.00976 [cs, math]},
	author = {Ambrose, Kristopher and Huntsman, Steve and Robinson, Michael and Yutin, Matvey},
	month = mar,
	year = {2020},
	note = {arXiv: 2003.00976},
	keywords = {Computer Science - Software Engineering, Mathematics - Algebraic Topology},
	file = {Ambrose et al. - 2020 - Topological Differential Testing.pdf:/Users/tullsen/Zotero/storage/962AU6TX/Ambrose et al. - 2020 - Topological Differential Testing.pdf:application/pdf},
}

@misc{adobeAdobeTechnicalNote1999,
	title = {Adobe {Technical} {Note} \#5620 {Portable} {Job} {Ticket} {Format}},
	copyright = {Copyright Adobe Systems Inc.},
	shorttitle = {{PJTF}},
	url = {https://reference.pdfa.org/iso/32000/wp-content/uploads/2017/06/5620.PortableJobTicket.pdf},
	abstract = {This document describes the Portable Job Ticket Format (PJTF). PJTF provides a mechanism for specifying the instructions and the location of the contents needed to execute a print job. PJTF is based on the Portable Document Format (PDF), and the data representations allowed within PJTF are defined directly or indirectly in terms of PDF object types.
See Portable Document Format 1.2 Reference Manual for complete descriptions of recognized PDF object types.
A job ticket is a data representation which conforms to PJTF and which specifies the instructions and the location of the contents for one print job. Job tickets can exist as stand-alone files, or they can reside in the same file as a PDF document. 
A primary use of PJTF is to prepare a device (typically a printer or imagesetter) to receive and process the files that describe a printed document in a high-level Page Description Language (PDL) such as PDF. The job ticket provides the device setup information, while the PDL describes the content to be marked on the output medium.},
	language = {English},
	publisher = {Adobe Systems Inc.},
	author = {{Adobe}},
	month = apr,
	year = {1999},
	note = {PJTF is written in PDF COS syntax.},
	file = {5620.PortableJobTicket.pdf:/Users/tullsen/Zotero/storage/9G3E2YQG/5620.PortableJobTicket.pdf:application/pdf},
}

@misc{nathanhaukeDenialServiceFistful2019,
	address = {USA},
	title = {Denial of {Service} with a {Fistful} of {Packets}: {Exploiting} {Algorithmic} {Complexity} {Vulnerabilities}},
	url = {https://i.blackhat.com/USA-19/Thursday/us-19-Hauke-Denial-Of-Service-With-A-Fistful-Of-Packets-Exploiting-Algorithmic-Complexity-Vulnerabilities.pdf},
	abstract = {Talk Roadmap:
• Algorithmic Complexity (AC) vulnerability recap
• 3 new AC vulnerabilities we discovered:
   • PDF specification 
   • Linux VNC servers 
   • Dropbox’s zxcvbn algorithm
• Defense and Mitigations
• ACsploit 
   - Arsenal at 11:30},
	language = {English},
	author = {{Nathan Hauke} and {David Renardy}},
	month = aug,
	year = {2019},
	file = {Nathan Hauke and David Renardy - 2019 - Denial of Service with a Fistful of Packets Explo.pdf:/Users/tullsen/Zotero/storage/AWF2TT5B/Nathan Hauke and David Renardy - 2019 - Denial of Service with a Fistful of Packets Explo.pdf:application/pdf},
}

@article{liyuanzhangFeaturevectorGenerativeAdversarial2020,
	title = {A {Feature}-vector {Generative} {Adversarial} {Network} for {Evading} {PDF} {Malware} {Classifiers}},
	url = {https://www.sciencedirect.com/science/article/pii/S0020025520301651},
	doi = {https://doi.org/10.1016/j.ins.2020.02.075},
	abstract = {Cyber-Physical Systems (CPS) are increasingly utilizing machine learning (ML) algorithms to resolve different control and decision making problems. CPS are traditionally vulnerable to evasion attacks and adversarial examples, hence the integration of learning algorithms requires that these vulnerabilities are reevaluated to make the cyber-physical systems more secure and robust. In this work, we propose a novel evasion method based on a feature-vector generative adversarial network (fvGAN) to attack a learning-based malware classifier. The generative adversarial network (GAN) has been widely used in the realistic fake-image generation, but it has rarely been studied for adversarial malware generation. This work uses the fvGAN to generate adversarial feature vectors in the feature space, and then transforms them into actual adversarial malware examples. We have experimentally investigated the effectiveness of the proposed approach on a well-known PDF malware classifier, PDFRate, and evaluated the fvGAN-based attack in four evasion scenarios. The results show that the fvGAN model has a high evasion rate within a limited time. We have also compared the proposed approach with two existing attack algorithms, namely Mimicry and GD-KDE, and the results prove that the proposed scheme has better performance both in terms of evasion rate and execution cost.},
	language = {English},
	urldate = {2020-03-12},
	journal = {Information Sciences},
	author = {{Li, Yuanzhang} and {Wang, Yaxiao} and {Wang, Ye} and {Ke, Lishan} and {Tan, Yu-an}},
	month = mar,
	year = {2020},
}

@misc{aldusTIFFRevision1992,
	title = {{TIFF} {Revision} 6.0},
	copyright = {Copyright Adobe Systems Inc.},
	shorttitle = {{TIFF6}},
	url = {https://www.adobe.io/content/dam/udp/en/open/standards/tiff/TIFF6.pdf},
	abstract = {This document describes TIFF, a tag-based file format for storing and interchanging raster images.
The first version of the TIFF specification was published by Aldus Corporation in the fall of 1986, after a series of meetings with various scanner manufacturers and software developers. It did not have a revision number but should have been labeled Revision 3.0 since there were two major earlier draft releases. Revision 4.0 contained mostly minor enhancements and was released in April 1987. Revision 5.0, released in October 1988, added support for palette color images and LZW compression.
TIFF describes image data that typically comes from scanners, frame grabbers, and paint- and photo-retouching programs.
TIFF is not a printer language or page description language. The purpose of TIFF is to describe and store raster image data.
A primary goal of TIFF is to provide a rich environment within which applications can exchange image data. This richness is required to take advantage of the varying capabilities of scanners and other imaging devices.
Though TIFF is a rich format, it can easily be used for simple scanners and applications as well because the number of required fields is small.
TIFF will be enhanced on a continuing basis as new imaging needs arise. A high priority has been given to structuring TIFF so that future enhancements can be added without causing unnecessary hardship to developers.},
	language = {English},
	publisher = {Adobe Systems Inc.},
	author = {{Aldus}},
	month = jun,
	year = {1992},
	note = {Originally published by Aldus.},
	file = {1992 - TIFF Revision 6.0.pdf:/Users/tullsen/Zotero/storage/U7ZPTIKC/1992 - TIFF Revision 6.0.pdf:application/pdf},
}

@article{araujoVulnerabilityExploitationsUsing2020,
	title = {Vulnerability {Exploitations} {Using} {Steganography} in {PDF} {Files}},
	volume = {7},
	issn = {2395-0455},
	url = {https://s3.amazonaws.com/academia.edu.documents/62235313/IJCNA-2020-O-0220200229-50218-1f2r5rd.pdf?response-content-disposition=inline%3B%20filename%3DVulnerability_Exploitations_Using_Stegan.pdf&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWOWYYGZ2Y53UL3A%2F20200306%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20200306T035904Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=8eb139084efeb95aca43de4989062355de9c0ec809ae8e1cc9717cc67888959d},
	doi = {10.22247/ijcna/2020/193270},
	abstract = {This article analyses the ways malicious executable files hides with Steganography on the most used files of our daily basis such as PDF, Word, Text, and Image. It demonstrates how data is hidden and gathers innovative ways of identifying potential attacks to prevent them by engaging the safety and exploitation of files distributed online. It is concerned with infected files that can have malicious executable applications embedded, executing itself upon the opening of the original file. Several experiments are detailed exploiting gaps in PDF, email and image files in order to draw awareness to security professionals and Ethical hackers’ trainees.},
	language = {English},
	number = {1},
	journal = {International Journal of Computer Networks and Applications},
	author = {Araujo, Istteffanny Isloure and Kazemian, Hassan},
	month = feb,
	year = {2020},
	pages = {9},
	file = {Araujo and Kazemian - 2020 - Vulnerability Exploitations Using Steganography in.pdf:/Users/tullsen/Zotero/storage/DXCTDH22/Araujo and Kazemian - 2020 - Vulnerability Exploitations Using Steganography in.pdf:application/pdf},
}

@article{castiglioneSecurityPrivacyIssues2010,
	title = {Security and privacy issues in the {Portable} {Document} {Format}},
	volume = {83},
	issn = {0164-1212},
	url = {http://www.sciencedirect.com/science/article/pii/S0164121210001287},
	doi = {10.1016/j.jss.2010.04.062},
	abstract = {The Portable Document Format (PDF) was developed by Adobe in the early nineties and today it is the de-facto standard for electronic document exchange. It allows reliable reproductions of published materials on any platform and it is used by many governmental and educational institutions, as well as companies and individuals. PDF documents are also credited with being more secure than other document formats such as Microsoft Compound Document File Format or Rich Text Format. This paper investigates the Portable Document Format and shows that it is not immune from some privacy related issues that affect other popular document formats. From a PDF document, it is possible to retrieve any text or object previously deleted or modified, extract user information and perform some actions that may be used to violate user privacy. There are several applications of such an issue. One of them is relevant to the scientific community and it pertains to the ability to overcome the blind review process of a paper, revealing information related to the anonymous referee (e.g., the IP address of the referee).},
	number = {10},
	urldate = {2019-03-23},
	journal = {Journal of Systems and Software},
	author = {Castiglione, Aniello and De Santis, Alfredo and Soriente, Claudio},
	month = oct,
	year = {2010},
	keywords = {Digital forensics, Portable Document Format (PDF), Security, Privacy, Compound document format, Digital investigations, Document security, Electronic document, Information forensics, Information leakage},
	pages = {1813--1822},
	file = {Castiglione et al. - 2010 - Security and privacy issues in the Portable Docume.pdf:/Users/tullsen/Zotero/storage/M4XZGHYX/Castiglione et al. - 2010 - Security and privacy issues in the Portable Docume.pdf:application/pdf},
}

@article{ehteshamifarEasyFoolTesting2019,
	title = {Easy to {Fool}? {Testing} the {Anti}-evasion {Capabilities} of {PDF} {Malware} {Scanners}},
	volume = {abs/1901.05674},
	shorttitle = {Easy to {Fool}?},
	doi = {10.1016/j.jss.2010.04.062},
	abstract = {The Portable Document Format (PDF) was developed by Adobe in the early nineties and today it is the defacto standard for electronic document exchange. It allows reliable reproductions of published materials on any platform and it is used by many governmental and educational institutions, as well as companies and individuals. PDF documents are also credited with being more secure than other document formats such as Microsoft Compound Document File Format or Rich Text Format. This paper investigates the Portable Document Format and shows that it is not immune from some privacy related issues that affect other popular document formats. From a PDF document, it is possible to retrieve any text or object previously deleted or modified, extract user information and perform some},
	journal = {CoRR},
	author = {Ehteshamifar, Saeed and Barresi, Antonio and Gross, Thomas R. and Pradel, Michael},
	month = jan,
	year = {2019},
	keywords = {Portable Document Format, Privacy},
	annote = {Summary
set of malicious and benign PDF files that are obtained from Mila Parkour, the owner of Contagiodump7, a public malware repository. The malicious sets comprises more than 11,000 files, which are labeled as ’MALWARE\_PDF\_CVEsorted\_173\_files’ and ’MALWARE\_PDF\_PRE\_04-2011\_10982\_files’. The benign set comprises 9,000 files labeled as ’CLEAN\_PDF\_9000\_files’.
Most of the above appear to be JS oriented...
 
 },
	file = {Ehteshamifar et al. - 2019 - Easy to Fool Testing the Anti-evasion Capabilitie.pdf:/Users/tullsen/Zotero/storage/FYR6ERRK/Ehteshamifar et al. - 2019 - Easy to Fool Testing the Anti-evasion Capabilitie.pdf:application/pdf},
}

@article{cohenMalJPEGMachineLearning2020,
	title = {{MalJPEG}: {Machine} {Learning} {Based} {Solution} for the {Detection} of {Malicious} {JPEG} {Images}},
	volume = {8},
	issn = {2169-3536},
	shorttitle = {{MalJPEG}},
	url = {https://ieeexplore.ieee.org/document/8967109},
	doi = {10.1109/ACCESS.2020.2969022},
	abstract = {In recent years, cyber-attacks against individuals, businesses, and organizations have increased. Cyber criminals are always looking for effective vectors to deliver malware to victims in order to launch an attack. Images are used on a daily basis by millions of people around the world, and most users consider images to be safe for use; however, some types of images can contain a malicious payload and perform harmful actions. JPEG is the most popular image format, primarily due to its lossy compression. It is used by almost everyone, from individuals to large organizations, and can be found on almost every device (on digital cameras and smartphones, websites, social media, etc.). Because of their harmless reputation, massive use, and high potential for misuse, JPEG images are used by cyber criminals as an attack vector. While machine learning methods have been shown to be effective at detecting known and unknown malware in various domains, to the best of our knowledge, machine learning methods have not been used particularly for the detection of malicious JPEG images. In this paper, we present MalJPEG, the first machine learning-based solution tailored specifically at the efficient detection of unknown malicious JPEG images. MalJPEG statically extracts 10 simple yet discriminative features from the JPEG file structure and leverages them with a machine learning classifier, in order to discriminate between benign and malicious JPEG images. We evaluated MalJPEG extensively on a real-world representative collection of 156,818 images which contains 155,013 (98.85\%) benign and 1,805 (1.15\%) malicious images. The results show that MalJPEG, when used with the LightGBM classifier, demonstrates the highest detection capabilities, with an area under the receiver operating characteristic curve (AUC) of 0.997, true positive rate (TPR) of 0.951, and a very low false positive rate (FPR) of 0.004.},
	language = {English},
	journal = {IEEE Access},
	author = {Cohen, Aviad and Nissim, Nir and Elovici, Yuval},
	month = jan,
	year = {2020},
	note = {Conference Name: IEEE Access},
	keywords = {machine learning, malware, detection, features, image, JPEG},
	pages = {19997--20011},
	file = {Cohen et al. - 2020 - MalJPEG Machine Learning Based Solution for the D.pdf:/Users/tullsen/Zotero/storage/ZPDAAX3Z/Cohen et al. - 2020 - MalJPEG Machine Learning Based Solution for the D.pdf:application/pdf},
}

@misc{matthewhardyMarsProjectPDF2007,
	title = {Mars {Project} ({PDF} as {XML})},
	copyright = {Copyright Adobe Systems Inc.},
	shorttitle = {{MARS}},
	url = {https://labs.adobe.com/downloads/mars.html},
	abstract = {The Mars Project is an XML friendly representation of PDF documents. Already an open specification, PDF is the global standard for trusted, high fidelity electronic documentation. The Mars file format incorporates additional standards such as SVG, PNG, JPG, JPG2000, OpenType, Xpath and XML into a ZIP-based document container. The Mars plug-ins enable creation and recognition of the Mars file format by Adobe Acrobat 8 Professional and Adobe Reader 8 software.},
	language = {English},
	publisher = {Adobe Labs},
	author = {{Matthew Hardy}},
	year = {2007},
	note = {Adobe Labs website is available through WayBack machine although the MARS Specification document is truncated at 128KB (http://download.macromedia.com/pub/labs/mars/mars\_reference.pdf)
See also https://patents.google.com/patent/US8495098B1/en which mentions MARS.},
	file = {Matthew Hardy - 2007 - Mars Project (PDF as XML).pdf:/Users/tullsen/Zotero/storage/SLCALLMK/Matthew Hardy - 2007 - Mars Project (PDF as XML).pdf:application/pdf;Pages from MARS_Overview_II.pdf:/Users/tullsen/Zotero/storage/KNRL7YBE/Pages from MARS_Overview_II.pdf:application/pdf},
}

@misc{pdfassociationTechnicalNote00032007,
	title = {Technical {Note} 0003: {Metadata} in {PDF}/{A}-1},
	shorttitle = {{TechNote} 003 {Metadata}},
	url = {https://www.pdfa.org/resource/technical-note-tn0003-metadata-in-pdfa-1/},
	abstract = {In early PDF versions a document information dictionary (denoted by the Infoentry in the Trailerdictionary of a PDF file) was intended to carry information about the PDF. This dictionary is not required by in PDF 1.4, but Adobe Acrobat seems to always create the document information dictionary if it’s not present, whenever a PDF is saved.
PDF 1.4 specifies the following entries in the document information dictionary :Title, Author, Subject, Keywords,Creator, Producer, CreationDate, ModDate, Trapped
However, it neither strictly regulates whether and how these entries are to be used, nor prohibits the presence of other entries in the document information dictionary. Syntactically it’s even possible to store arbitrary data structures – data types other than string, or even dictionaries and arrays – inside the Infodictionary, although the PDF 1.4 reference advises against storing private con-tent or structural information in it.
It is important to understand that the PDF data type text string, which is used for most document information entries, is specified such that it either contains text encoded using PDFDocEncoding, or as Unicode. If it is encoded in Uni-code (more precisely: big-endian UTF-16) the first two bytes must be the Uni-code byte order mark U+FEFF, and the remainder of the string consists of Uni-code character codes according to the UTF-16 format},
	language = {English},
	publisher = {PDF Association},
	author = {{PDF Association}},
	month = apr,
	year = {2007},
	file = {PDF Association - 2007 - Technical Note 0003 Metadata in PDFA-1.pdf:/Users/tullsen/Zotero/storage/ETVPRXIT/PDF Association - 2007 - Technical Note 0003 Metadata in PDFA-1.pdf:application/pdf},
}

@misc{pdfassociationTechnicalNote00022007,
	title = {Technical {Note} 0002: {Color} in {PDF}/{A}-1},
	shorttitle = {{TechNote} 0002 {Color}},
	url = {https://www.pdfa.org/resource/technical-note-tn0002-color-in-pdfa-1/},
	abstract = {The PDF/A-1 requirements related to color handling may be confusing to users and developers who are not familiar with color management concepts and ICC profiles. This TechNote describes the affected objects in PDF, details the require-ments of PDF/A-1 with respect to color handling, and provides recommenda-tions for color strategies in common situations.},
	language = {English},
	publisher = {PDF Association},
	author = {{PDF Association}},
	month = apr,
	year = {2007},
	file = {PDF Association - 2007 - Technical Note 0002 Color in PDFA-1.pdf:/Users/tullsen/Zotero/storage/568GUMZI/PDF Association - 2007 - Technical Note 0002 Color in PDFA-1.pdf:application/pdf},
}

@misc{pdfassociationTechnicalNote00012007,
	title = {Technical {Note} 0001: {PDF}/{A}-1 and {Namespaces}},
	shorttitle = {{TechNote} 0001 {Namespaces}},
	url = {https://www.pdfa.org/resource/technical-note-tn0001-pdfa-1-and-namespaces/},
	abstract = {NOTE: The information provided in this TechNote has been published by ISO as PDF/A-1 Technical Corrigendum 1 (see [4]) in April 2007. However, this TechNote predates Technical Corrigendum 1 and we still provide it for consistency.
A PDF/A-1 file must be identified as such by the presence of a certain entry in its XMP document metadata. In order to be able to retrieve this entry reliably, it is essential that all syntactical requirements for storing metadata are followed precisely. In early implementations of PDF/A tools this was not always the case due to several reasons},
	language = {English},
	publisher = {PDF Association},
	author = {{PDF Association}},
	month = apr,
	year = {2007},
	file = {PDF Association - 2007 - Technical Note 0001 PDFA-1 and Namespaces.pdf:/Users/tullsen/Zotero/storage/GVDTZJP5/PDF Association - 2007 - Technical Note 0001 PDFA-1 and Namespaces.pdf:application/pdf},
}

@misc{FracturXV12017,
	title = {Fractur-{X} v1.0},
	shorttitle = {Fractur-{X} 1.0},
	url = {http://fnfe-mpe.org/factur-x/factur-x_en/},
	abstract = {Factur-X is a Franco-German standard for hybrid e-invoice (PDF for users and XML data for process automation), the first implementation of the European Semantic Standard EN 16931 published by the European Commission on October 16th 2017. Factur-X is the same standard as ZUGFeRD 2.0.
Factur-X is at the same time a full readable invoice in a PDF A/3 format, containing all information useful for its treatment, especially in case of discrepancy or absence of automatic matching with orders and / or receptions, and a set of invoice data presented in an XML structured file conformant to EN16931 (syntax CII D16B), complete or not, allowing invoice process automation.
The first objective of Factur-X is to enable suppliers, invoice issuers, to create added-value e-invoices, containing a maximum of information in structured form, according to their ability to produce them in this form, and to let customers recipients free to use the invoice data and / or the readable presentation, depending on their needs and their invoice process maturity on automation.
In order to allow the broadest adoption by any size of companies, and in order to guide them, this standard includes several data profiles.
AIFE (French Agency for State Financial Informatics), who is in charge of CHORUSPRO has integrated Factur-X as an available hybrid format in CHORUSPRO, since May 2018.  
Following  the final version V1.0 published on December 31st 2017, and the first implementation in the first semester of 2018, the FNFE-MPE has published the 24th of July 2018, corrected on July 31st, an V1.2 version of the documentation, in English and French. Following the cooperation with FERD on ZUGFeRD 2.0 (which is strictly identical to Factur-x) and the corrigendum of CEN TC 434 on syntax bindings and codelists, we publish the version 1.3 of the Factur-x documentation.},
	language = {French},
	publisher = {Forum National de la Fracture Electronique et des Marches Publics Electroniques},
	month = dec,
	year = {2017},
	keywords = {Invoicing, PDF/A},
}

@misc{FracturXV12018,
	title = {Fractur-{X} v1.1},
	shorttitle = {Fractur-{X} 1.1},
	url = {http://fnfe-mpe.org/factur-x/factur-x_en/},
	abstract = {Factur-X is a Franco-German standard for hybrid e-invoice (PDF for users and XML data for process automation), the first implementation of the European Semantic Standard EN 16931 published by the European Commission on October 16th 2017. Factur-X is the same standard as ZUGFeRD 2.0.
Factur-X is at the same time a full readable invoice in a PDF A/3 format, containing all information useful for its treatment, especially in case of discrepancy or absence of automatic matching with orders and / or receptions, and a set of invoice data presented in an XML structured file conformant to EN16931 (syntax CII D16B), complete or not, allowing invoice process automation.
The first objective of Factur-X is to enable suppliers, invoice issuers, to create added-value e-invoices, containing a maximum of information in structured form, according to their ability to produce them in this form, and to let customers recipients free to use the invoice data and / or the readable presentation, depending on their needs and their invoice process maturity on automation.
In order to allow the broadest adoption by any size of companies, and in order to guide them, this standard includes several data profiles.
AIFE (French Agency for State Financial Informatics), who is in charge of CHORUSPRO has integrated Factur-X as an available hybrid format in CHORUSPRO, since May 2018.  
Following  the final version V1.0 published on December 31st 2017, and the first implementation in the first semester of 2018, the FNFE-MPE has published the 24th of July 2018, corrected on July 31st, an V1.2 version of the documentation, in English and French. Following the cooperation with FERD on ZUGFeRD 2.0 (which is strictly identical to Factur-x) and the corrigendum of CEN TC 434 on syntax bindings and codelists, we publish the version 1.0.3 of the Factur-x documentation.},
	language = {French, English},
	publisher = {Forum National de la Fracture Electronique et des Marches Publics Electroniques},
	month = jul,
	year = {2018},
	keywords = {Invoicing, PDF/A},
}

@misc{FracturXV12018a,
	title = {Fractur-{X} v1.2},
	shorttitle = {Fractur-{X} 1.2},
	url = {http://fnfe-mpe.org/factur-x/factur-x_en/},
	abstract = {Factur-X is a Franco-German standard for hybrid e-invoice (PDF for users and XML data for process automation), the first implementation of the European Semantic Standard EN 16931 published by the European Commission on October 16th 2017. Factur-X is the same standard as ZUGFeRD 2.0.
Factur-X is at the same time a full readable invoice in a PDF A/3 format, containing all information useful for its treatment, especially in case of discrepancy or absence of automatic matching with orders and / or receptions, and a set of invoice data presented in an XML structured file conformant to EN16931 (syntax CII D16B), complete or not, allowing invoice process automation.
The first objective of Factur-X is to enable suppliers, invoice issuers, to create added-value e-invoices, containing a maximum of information in structured form, according to their ability to produce them in this form, and to let customers recipients free to use the invoice data and / or the readable presentation, depending on their needs and their invoice process maturity on automation.

In order to allow the broadest adoption by any size of companies, and in order to guide them, this standard includes several data profiles.

AIFE (French Agency for State Financial Informatics), who is in charge of CHORUSPRO has integrated Factur-X as an available hybrid format in CHORUSPRO, since May 2018.  
Following  the final version V1.0 published on December 31st 2017, and the first implementation in the first semester of 2018, the FNFE-MPE has published the 24th of July 2018, corrected on July 31st, an V1.2 version of the documentation, in English and French. Following the cooperation with FERD on ZUGFeRD 2.0 (which is strictly identical to Factur-x) and the corrigendum of CEN TC 434 on syntax bindings and codelists, we publish the version 1.0.3 of the Factur-x documentation.},
	language = {French, English},
	publisher = {Forum National de la Fracture Electronique et des Marches Publics Electroniques},
	month = jul,
	year = {2018},
	keywords = {Invoicing, PDF/A},
}

@misc{FracturXV12a2018,
	title = {Fractur-{X} v1.2a},
	shorttitle = {Fractur-{X} 1.2a},
	url = {http://fnfe-mpe.org/factur-x/factur-x_en/},
	abstract = {Factur-X is a Franco-German standard for hybrid e-invoice (PDF for users and XML data for process automation), the first implementation of the European Semantic Standard EN 16931 published by the European Commission on October 16th 2017. Factur-X is the same standard as ZUGFeRD 2.0.
Factur-X is at the same time a full readable invoice in a PDF A/3 format, containing all information useful for its treatment, especially in case of discrepancy or absence of automatic matching with orders and / or receptions, and a set of invoice data presented in an XML structured file conformant to EN16931 (syntax CII D16B), complete or not, allowing invoice process automation.
The first objective of Factur-X is to enable suppliers, invoice issuers, to create added-value e-invoices, containing a maximum of information in structured form, according to their ability to produce them in this form, and to let customers recipients free to use the invoice data and / or the readable presentation, depending on their needs and their invoice process maturity on automation.

In order to allow the broadest adoption by any size of companies, and in order to guide them, this standard includes several data profiles.

AIFE (French Agency for State Financial Informatics), who is in charge of CHORUSPRO has integrated Factur-X as an available hybrid format in CHORUSPRO, since May 2018.  
Following  the final version V1.0 published on December 31st 2017, and the first implementation in the first semester of 2018, the FNFE-MPE has published the 24th of July 2018, corrected on July 31st, an V1.2 version of the documentation, in English and French. Following the cooperation with FERD on ZUGFeRD 2.0 (which is strictly identical to Factur-x) and the corrigendum of CEN TC 434 on syntax bindings and codelists, we publish the version 1.3 of the Factur-x documentation.},
	language = {French, English},
	publisher = {Forum National de la Fracture Electronique et des Marches Publics Electroniques},
	month = sep,
	year = {2018},
	keywords = {Invoicing, PDF/A},
}

@misc{FracturXV12018b,
	title = {Fractur-{X} v1.3},
	shorttitle = {Fractur-{X} v1.3},
	url = {http://fnfe-mpe.org/factur-x/factur-x_en/},
	abstract = {Factur-X is a Franco-German standard for hybrid e-invoice (PDF for users and XML data for process automation), the first implementation of the European Semantic Standard EN 16931 published by the European Commission on October 16th 2017. Factur-X is the same standard as ZUGFeRD 2.0.
Factur-X is at the same time a full readable invoice in a PDF A/3 format, containing all information useful for its treatment, especially in case of discrepancy or absence of automatic matching with orders and / or receptions, and a set of invoice data presented in an XML structured file conformant to EN16931 (syntax CII D16B), complete or not, allowing invoice process automation.
The first objective of Factur-X is to enable suppliers, invoice issuers, to create added-value e-invoices, containing a maximum of information in structured form, according to their ability to produce them in this form, and to let customers recipients free to use the invoice data and / or the readable presentation, depending on their needs and their invoice process maturity on automation.

In order to allow the broadest adoption by any size of companies, and in order to guide them, this standard includes several data profiles.

AIFE (French Agency for State Financial Informatics), who is in charge of CHORUSPRO has integrated Factur-X as an available hybrid format in CHORUSPRO, since May 2018.  
Following  the final version V1.0 published on December 31st 2017, and the first implementation in the first semester of 2018, the FNFE-MPE has published the 24th of July 2018, corrected on July 31st, an V1.2 version of the documentation, in English and French. Following the cooperation with FERD on ZUGFeRD 2.0 (which is strictly identical to Factur-x) and the corrigendum of CEN TC 434 on syntax bindings and codelists, we publish the version 1.3 of the Factur-x documentation.},
	language = {French, English},
	publisher = {Forum National de la Fracture Electronique et des Marches Publics Electroniques},
	month = oct,
	year = {2018},
	keywords = {Invoicing, PDF/A},
	file = {2018 - Fractur-X v1.3.pdf:/Users/tullsen/Zotero/storage/EXF62R2Y/2018 - Fractur-X v1.3.pdf:application/pdf},
}

@misc{pdfassociationTechnicalNote00032007a,
	title = {Technical {Note} 0003: {Metadata} in {PDF}/{A}-1},
	shorttitle = {{TechNote} 003 {Metadata}},
	url = {https://www.pdfa.org/resource/technical-note-tn0003-metadata-in-pdfa-1/},
	abstract = {In early PDF versions a document information dictionary (denoted by the Infoentry in the Trailerdictionary of a PDF file) was intended to carry information about the PDF. This dictionary is not required by in PDF 1.4, but Adobe Acrobat seems to always create the document information dictionary if it’s not present, whenever a PDF is saved.
PDF 1.4 specifies the following entries in the document information dictionary :Title, Author, Subject, Keywords,Creator, Producer, CreationDate, ModDate, Trapped
However, it neither strictly regulates whether and how these entries are to be used, nor prohibits the presence of other entries in the document information dictionary. Syntactically it’s even possible to store arbitrary data structures – data types other than string, or even dictionaries and arrays – inside the Infodictionary, although the PDF 1.4 reference advises against storing private con-tent or structural information in it.
It is important to understand that the PDF data type text string, which is used for most document information entries, is specified such that it either contains text encoded using PDFDocEncoding, or as Unicode. If it is encoded in Uni-code (more precisely: big-endian UTF-16) the first two bytes must be the Uni-code byte order mark U+FEFF, and the remainder of the string consists of Uni-code character codes according to the UTF-16 format},
	language = {English},
	publisher = {PDF Association},
	author = {{PDF Association}},
	month = mar,
	year = {2007},
	file = {PDF Association - 2007 - Technical Note 0003 Metadata in PDFA-1.pdf:/Users/tullsen/Zotero/storage/NCWKE4VK/PDF Association - 2007 - Technical Note 0003 Metadata in PDFA-1.pdf:application/pdf},
}

@misc{pdfassociationTechnicalNote00022007a,
	title = {Technical {Note} 0002: {Color} in {PDF}/{A}-1},
	shorttitle = {{TechNote} 0002 {Color}},
	url = {https://www.pdfa.org/resource/technical-note-tn0002-color-in-pdfa-1/},
	abstract = {The PDF/A-1 requirements related to color handling may be confusing to users and developers who are not familiar with color management concepts and ICC profiles. This TechNote describes the affected objects in PDF, details the require-ments of PDF/A-1 with respect to color handling, and provides recommenda-tions for color strategies in common situations.},
	language = {English},
	publisher = {PDF Association},
	author = {{PDF Association}},
	month = mar,
	year = {2007},
	file = {PDF Association - 2007 - Technical Note 0002 Color in PDFA-1.pdf:/Users/tullsen/Zotero/storage/MVZDKI74/PDF Association - 2007 - Technical Note 0002 Color in PDFA-1.pdf:application/pdf},
}

@misc{pdfassociationTechnicalNote00012006,
	title = {Technical {Note} 0001: {PDF}/{A}-1 and {Namespaces}},
	shorttitle = {{TechNote} 0001 {Namespaces}},
	url = {https://www.pdfa.org/resource/technical-note-tn0001-pdfa-1-and-namespaces/},
	abstract = {NOTE: The information provided in this TechNote has been published by ISO as PDF/A-1 Technical Corrigendum 1 (see [4]) in April 2007. However, this TechNote predates Technical Corrigendum 1 and we still provide it for consistency.
A PDF/A-1 file must be identified as such by the presence of a certain entry in its XMP document metadata. In order to be able to retrieve this entry reliably, it is essential that all syntactical requirements for storing metadata are followed precisely. In early implementations of PDF/A tools this was not always the case due to several reasons},
	language = {English},
	publisher = {PDF Association},
	author = {{PDF Association}},
	month = dec,
	year = {2006},
	file = {PDF Association - 2006 - Technical Note 0001 PDFA-1 and Namespaces.pdf:/Users/tullsen/Zotero/storage/2BL7FG7U/PDF Association - 2006 - Technical Note 0001 PDFA-1 and Namespaces.pdf:application/pdf},
}

@misc{pdfassociationTechnicalNote00092008,
	title = {Technical {Note} 0009: {XMP} {Extension} {Schemas} in {PDF}/{A}-1},
	shorttitle = {{TechNote} 0009},
	url = {https://www.pdfa.org/resource/technical-note-tn-0009-xmp-extension-schemas-in-pdfa-1/},
	abstract = {This TechNote explains how to construct an XMP extension schema for PDF/A-1. It complements TechNote 0008: Predefined XMP Properties in PDF/A-1. Both TechNotes should be used in combination as follows:
If metadata requirements for PDF/A-1 documents arise, first consult the list of predefined XMP properties in TechNote 0008. Identify appropriate metadata schemas and properties which match the requirements. If no such properties are available proceed to the next step.
If no predefined XMP schema can be used, an XMP extension schema for PDF/A-1 must be defined and included in the document's XMP metadata. This TechNote explains the details of this process, and presents an example of such an XMP extension schema and the corresponding description which is required in PDF/A-1.
In addition to talking about XMP extension schemas for PDF/A-1 this TechNote includes various general notes on XMP and Acrobat. These may be useful when working with XMP extension schemas.
Extension schemas can also add properties to the predefined XMP schemas. In order to add new properties to one of the predefined schemas you must also create an extension schema according to this TechNote. It is recommend to not include descriptions of predefined properties in such an extension schema container schema. However, if predefined properties are included in an extension schema it is recommended that PDF/A validators ignore such property descriptions.
Note: The term "schema" in the context of XMP metadata for PDF/A-1 is unrelated to XML schema definitions (.xsd).},
	language = {English},
	publisher = {PDF Association},
	author = {{PDF Association}},
	month = mar,
	year = {2008},
	file = {PDF Association - 2008 - Technical Note 0009 XMP Extension Schemas in PDF.pdf:/Users/tullsen/Zotero/storage/SUNWMLGD/PDF Association - 2008 - Technical Note 0009 XMP Extension Schemas in PDF.pdf:application/pdf},
}

@misc{pdfassociationTechnicalNote00082008,
	title = {Technical {Note} 0008: {Predefined} {XMP} {Properties} in {PDF}/{A}-1},
	shorttitle = {{TechNote} 0008},
	url = {https://www.pdfa.org/resource/technical-note-tn0008-predefined-xmp-properties-in-pdfa-1/},
	abstract = {This TechNote lists all XMP schemas and properties which are allowed in PDF/A-1. Most importantly, it covers the predefined XMP schemas for use in PDF/A-1. In addition to providing a convenient summary, it also contains notes and recommendations regarding the use of these properties in PDF/A-1. This TechNote also covers the PDF/A-1 requirements regarding synchronization of document info entries and XMP properties.},
	language = {English},
	publisher = {PDF Association},
	author = {{PDF Association}},
	month = mar,
	year = {2008},
	file = {PDF Association - 2008 - TechNote 0008 Predefined XMP Properties in PDFA-.pdf:/Users/tullsen/Zotero/storage/AQHM4QPJ/PDF Association - 2008 - TechNote 0008 Predefined XMP Properties in PDFA-.pdf:application/pdf},
}

@misc{pdfassociationTechnicalNote00032008,
	title = {Technical {Note} 0003: {Metadata} in {PDF}/{A}-1},
	shorttitle = {{TechNote} 003 {Metadata}},
	url = {https://www.pdfa.org/resource/technical-note-tn0003-metadata-in-pdfa-1/},
	abstract = {In early PDF versions a document information dictionary (denoted by the Infoentry in the Trailerdictionary of a PDF file) was intended to carry information about the PDF. This dictionary is not required by in PDF 1.4, but Adobe Acrobat seems to always create the document information dictionary if it’s not present, whenever a PDF is saved.
PDF 1.4 specifies the following entries in the document information dictionary :Title, Author, Subject, Keywords,Creator, Producer, CreationDate, ModDate, Trapped
However, it neither strictly regulates whether and how these entries are to be used, nor prohibits the presence of other entries in the document information dictionary. Syntactically it’s even possible to store arbitrary data structures – data types other than string, or even dictionaries and arrays – inside the Infodictionary, although the PDF 1.4 reference advises against storing private con-tent or structural information in it.
It is important to understand that the PDF data type text string, which is used for most document information entries, is specified such that it either contains text encoded using PDFDocEncoding, or as Unicode. If it is encoded in Uni-code (more precisely: big-endian UTF-16) the first two bytes must be the Uni-code byte order mark U+FEFF, and the remainder of the string consists of Uni-code character codes according to the UTF-16 format},
	language = {English},
	publisher = {PDF Association},
	author = {{PDF Association}},
	month = mar,
	year = {2008},
	file = {PDF Association - 2008 - TechNote 0003 Metadata in PDFA-1.pdf:/Users/tullsen/Zotero/storage/QSCQBTKP/PDF Association - 2008 - TechNote 0003 Metadata in PDFA-1.pdf:application/pdf},
}

@misc{pdfassociationTechnicalNote00022008,
	title = {Technical {Note} 0002: {Color} in {PDF}/{A}-1},
	shorttitle = {{TechNote} 0002 {Color}},
	url = {https://www.pdfa.org/resource/technical-note-tn0002-color-in-pdfa-1/},
	abstract = {The PDF/A-1 requirements related to color handling may be confusing to users and developers who are not familiar with color management concepts and ICC profiles. This TechNote describes the affected objects in PDF, details the require-ments of PDF/A-1 with respect to color handling, and provides recommenda-tions for color strategies in common situations.},
	language = {English},
	publisher = {PDF Association},
	author = {{PDF Association}},
	month = mar,
	year = {2008},
	file = {PDF Association - 2008 - TechNote 0002 Color in PDFA-1.pdf:/Users/tullsen/Zotero/storage/77H7WBUN/PDF Association - 2008 - TechNote 0002 Color in PDFA-1.pdf:application/pdf},
}

@misc{pdfassociationTechnicalNote00012008,
	title = {Technical {Note} 0001: {PDF}/{A}-1 and {Namespaces}},
	shorttitle = {{TechNote} 0001 {Namespaces}},
	url = {https://www.pdfa.org/resource/technical-note-tn0001-pdfa-1-and-namespaces/},
	abstract = {NOTE: The information provided in this TechNote has been published by ISO as PDF/A-1 Technical Corrigendum 1 (see [4]) in April 2007. However, this TechNote predates Technical Corrigendum 1 and we still provide it for consistency.
A PDF/A-1 file must be identified as such by the presence of a certain entry in its XMP document metadata. In order to be able to retrieve this entry reliably, it is essential that all syntactical requirements for storing metadata are followed precisely. In early implementations of PDF/A tools this was not always the case due to several reasons},
	language = {English},
	publisher = {PDF Association},
	author = {{PDF Association}},
	month = mar,
	year = {2008},
	file = {PDF Association - 2008 - TechNote 0001 PDFA-1 and Namespaces.pdf:/Users/tullsen/Zotero/storage/V3P4QDDV/PDF Association - 2008 - TechNote 0001 PDFA-1 and Namespaces.pdf:application/pdf},
}

@misc{pdfassociationTechnicalNote00062007,
	title = {Technical {Note} 0006: {Digital} {Signatures} in {PDF}/{A}-1},
	shorttitle = {{TechNote} 0006 {DigSig}},
	url = {https://www.pdfa.org/resource/technical-note-tn0006-digital-signatures-in-pdfa-1/},
	abstract = {Digital signatures are primarily used to check the integrity of the signed part of the document. They also can be used to authenticate the signers identity and determine the time of signing. The concept of digital signatures was introduced in PDF 1.3 and thus is part of the ISO 19005-1 standard.
PDF 1.5, 1.6, 1.7 documents can also be PDF/A-1 confirming if they meet the requirements of the standard. This applies in particular to digital signatures, where for example the appearance stream must obey the rules of the standard whereas the cryptographic message syntax may conform to newer versions. Therefore higher versions than 1.4 are mentioned in this TechNote where appropriate.
PDF Reference 1.4 defines how digital signatures are to be embedded into a document. There are aspects of the digital signature that are impacted by the PDF/A-1 standard, e.g. fonts and colors. However, the standard does not make any statements about the semantics, i.e. on how signatures are created and validated. The semantics of digital signatures is left up to the corresponding signature handlers which are uniquely identified by registered names. Furthermore, PDF/A-1 does not require that conforming readers be able to validate digital signatures.
The main purpose of this TechNote is to help manufacturers of PDF/A-1 conforming producers to correctly embed digital signatures. Discussions about the structure of the signature value (cryptographic message syntax) and the long term quality of specific signature techniques are beyond the scope of this TechNote.
In order to verify the statements made here, a test implementation of a signature handler has been used to create, embed and validate a PDF/A-1 conforming digital signature. The PDF/A-1 conformance has been tested using Acrobat 8.0 Preflight. The signature validation has been tested using the signature handler plug-ins of Acrobat 5.0, 6.0, 7.0 and 8.0.},
	language = {English},
	publisher = {PDF Association},
	author = {{PDF Association}},
	month = sep,
	year = {2007},
	file = {PDF Association - TechNote 0006 Digital Signatures in PDFA-1.pdf:/Users/tullsen/Zotero/storage/JVXLHJZP/PDF Association - TechNote 0006 Digital Signatures in PDFA-1.pdf:application/pdf},
}

@misc{pdfassociationTaggedPDFBest2019,
	title = {Tagged {PDF} {Best} {Practice} {Guide}: {Syntax}. {For} developers implementing {ISO} 14289-1 ({PDF}/{UA}) - {Version} 1.0 ({June} 2019)},
	copyright = {Creative Commons Attribution 4.0 International License},
	shorttitle = {Tagged {PDF} {Best} {Practice} {Guide}},
	url = {https://www.pdfa.org/resource/tagged-pdf-best-practice-guide-syntax/},
	abstract = {This Best Practice Guide is intended to help developers understand the syntactical characteristics of tagged PDF files required for an accessible experience. Other Best Practice Guides will address other aspects of accessible PDF content.},
	language = {English},
	publisher = {PDF Association},
	author = {{PDF Association}},
	month = jun,
	year = {2019},
	file = {PDF Association - 2019 - Tagged PDF Best Practice Guide Syntax. For develo.pdf:/Users/tullsen/Zotero/storage/288AMPID/PDF Association - 2019 - Tagged PDF Best Practice Guide Syntax. For develo.pdf:application/pdf},
}

@misc{pdfassociationDerivingHTMLPDF2019,
	title = {Deriving {HTML} from {PDF}. {A} usage specification for tagged {ISO} 32000-2 files - {Version} 1.0 ({June} 2019)},
	copyright = {Creative Commons Attribution 4.0 International License},
	shorttitle = {Derivation {Algorithm}},
	url = {https://www.pdfa.org/resource/deriving-html-from-pdf/},
	abstract = {In the modern world of small devices, IoT and connected systems, where interchange and reuse of data is critical, it is reasonable to question the continued relevance of PDF’s core value proposition. In particular, search engines, machine learning and artificial intelligence systems focus on accessing information contained in documents over visual representation. In other cases, document producers wish to deliver data in a form that is suitable for automated processing while using a PDF file as a record for trust purposes. End users want electronic documents that adapt smoothly to viewing on diverse small devices.
By describing the algorithm that produces conforming HTML from a tagged PDF, this document shows how well-tagged PDF documents, containing both traditional fixed-layout content and the semantic structures leveraged by modern devices and software, can be reliably and consistently reused as HTML to support better user experiences and renew PDF’s value proposition.
HTML was chosen as a derivation target because HTML is consumed on all platforms and supported by all major vendors. With small modifications, developers can use this document to export content from well-tagged PDF to any format.},
	language = {English},
	publisher = {PDF Association},
	author = {{PDF Association}},
	month = jun,
	year = {2019},
	file = {PDF Association - 2019 - Deriving HTML from PDF. A usage specification for .pdf:/Users/tullsen/Zotero/storage/WA2MJX7Q/PDF Association - 2019 - Deriving HTML from PDF. A usage specification for .pdf:application/pdf},
}

@misc{pdfassociationPDFDeclarations2019,
	title = {{PDF} {Declarations}},
	copyright = {Creative Commons Attribution 4.0 International License},
	shorttitle = {{PDF} {Declarations}},
	url = {https://www.pdfa.org/resource/pdf-declarations/},
	abstract = {ISO-standardized subsets of PDF such as PDF/A, PDF/UA and PDF/X already include identification mechanisms. However, in many cases users of PDF files would like to leverage 3rd party standards or other profiles of PDF to meet specific needs.
The PDF Declarations mechanism allows creation and editing software to declare, via a PDF Declaration, a PDF file to be in conformance with a 3rd party specification or profile that may not be related to PDF technology. The 3rd party specification or profile may describe or require properties specific to some or all content in the PDF document. Cases include, but are not limited to specifications or profiles that:
* Mandate properties (e.g., accessibility specifications)
* Mandate degree of accuracy (e.g., engineering specifications)
* Set limits on content types (e.g., that all images use a specific encoding)
* Make an accountable policy statement regarding document content (e.g., pertaining to privacy regulations)
* Profile PDF for specific purposes (e.g., to archive email)
By itself, the presence of a PDF Declaration does not guarantee that the document conforms to the 3rd party specification or profile.},
	language = {English},
	publisher = {PDF Association},
	author = {{PDF Association}},
	month = sep,
	year = {2019},
	file = {PDF Association - 2019 - PDF Declarations.pdf:/Users/tullsen/Zotero/storage/NQXE242E/PDF Association - 2019 - PDF Declarations.pdf:application/pdf},
}

@book{isotc171sc2wg8ISO320001Document2008,
	edition = {1},
	series = {{PDF} {Specification}},
	title = {{ISO} 32000-1 {Document} management - {Portable} document format - {Part} 1: {PDF} 1.7},
	volume = {1},
	copyright = {Copyright Adobe Systems Inc.},
	shorttitle = {{ISO} 32000-1:2008 ({PDF} 1.7)},
	url = {https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf},
	abstract = {ISO 32000 specifies a digital form for representing documents called the Portable Document format or usually referred to as PDF. PDF was developed and specified by Adobe Systems Incorporated beginning in 1993 and continuing until 2007 when this ISO standard was prepared. The Adobe Systems version PDF 1.7 is the basis for this ISO 32000 edition. The specifications for PDF are backward inclusive, meaning that PDF 1.7 includes all of the functionality previously document in the Adobe PDF specifications for versions 1.0 through 1.6. It should be noted that where Adobe removed certain features of PDF from their standarf, they too are not contained herein.},
	language = {English},
	number = {ISO 32000},
	publisher = {ISO},
	editor = {{ISO TC 171 SC 2 WG 8}},
	month = jul,
	year = {2008},
	note = {This is the PDF file freely available from Adobe as part of the ISO "fast track" agreement. It is NOT the official ISO 32000-1:2008 standard and thus does not contain ISO logos, etc.},
	keywords = {PDF 1.7, ISO, ISO 32000},
	file = {2008 - ISO 32000-1 Document management - Portable documen.pdf:/Users/tullsen/Zotero/storage/22I6KNCP/2008 - ISO 32000-1 Document management - Portable documen.pdf:application/pdf},
}

@misc{adobeParametersOpeningPDF2008,
	title = {Parameters for {Opening} {PDF} {Files}, {Edition} 2.0},
	copyright = {Copyright Adobe Systems Inc.},
	url = {https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/pdf_open_parameters_v9.pdf},
	abstract = {This document describes the parameters you can use when opening Adobe® PDF files. These parameters allow you to open a PDF file using a URL or command that specifies both the file to be opened and the actions to be performed once the file is opened.},
	language = {English},
	publisher = {Adobe Systems Inc.},
	author = {{Adobe}},
	month = jun,
	year = {2008},
	note = {https://www.adobe.com/devnet/acrobat/wwdistiller.html},
	file = {Adobe - 2008 - Parameters for Opening PDF Files, Edition 2.0.pdf:/Users/tullsen/Zotero/storage/BJCUX6UD/Adobe - 2008 - Parameters for Opening PDF Files, Edition 2.0.pdf:application/pdf},
}

@misc{johnwarnockCamelotProject1991,
	title = {The {Camelot} {Project}},
	copyright = {Copyright Adobe Systems Inc.},
	shorttitle = {The {Camelot} {Project}},
	url = {http://www.eprg.org/G53DOC/pdfs/warnock_camelot.pdf},
	abstract = {This is the key discussion paper written by John Warnock of Adobe where he proposes an improvement to PostScript, code named "Camelot" which is to become PDF.},
	language = {English},
	publisher = {Adobe Systems Inc.},
	author = {{John Warnock}},
	year = {1991},
	note = {https://www.pdfa.org/resource/the-camelot-project/},
	keywords = {Adobe},
	file = {John Warnock - 1991 - The Camelot Project.pdf:/Users/tullsen/Zotero/storage/JLHRLSCH/John Warnock - 1991 - The Camelot Project.pdf:application/pdf},
}

@inproceedings{carmonyExtractMeIf2016,
	address = {San Diego, CA},
	title = {Extract {Me} {If} {You} {Can}: {Abusing} {PDF} {Parsers} in {Malware} {Detectors}},
	isbn = {978-1-891562-41-9},
	shorttitle = {Extract {Me} {If} {You} {Can}},
	url = {https://www.ndss-symposium.org/wp-content/uploads/2017/09/extract-me-if-you-can-abusing-pdf-parsers-malware-detectors.pdf},
	doi = {10.14722/ndss.2016.23483},
	abstract = {Owing to the popularity of the PDF format and the continued exploitation of Adobe Reader, the detection of malicious PDFs remains a concern. All existing detection techniques rely on the PDF parser to a certain extent, while the complexity of the PDF format leaves an abundant space for parser confusion. To quantify the difference between these parsers and Adobe Reader, we create a reference JavaScript extractor by directly tapping into Adobe Reader at locations identiﬁed through a mostly automatic binary analysis technique. By comparing the output of this reference extractor against that of several opensource JavaScript extractors on a large data set obtained from VirusTotal, we are able to identify hundreds of samples which existing extractors fail to extract JavaScript from. By analyzing these samples we are able to identify several weaknesses in each of these extractors. Based on these lessons, we apply several obfuscations on a malicious PDF sample, which can successfully evade all the malware detectors tested. We call this evasion technique a PDF parser confusion attack. Lastly, we demonstrate that the reference JavaScript extractor improves the accuracy of existing JavaScript-based classiﬁers and how it can be used to mitigate these parser limitations in a real-world setting.},
	language = {English},
	urldate = {2019-12-05},
	booktitle = {Proceedings 2016 {Network} and {Distributed} {System} {Security} {Symposium}},
	publisher = {Internet Society},
	author = {Carmony, Curtis and Zhang, Mu and Hu, Xunchao and Vasisht Bhaskar, Abhishek and Yin, Heng},
	year = {2016},
	file = {Carmony et al. - 2016 - Extract Me If You Can Abusing PDF Parsers in Malw.pdf:/Users/tullsen/Zotero/storage/ZIW8CZDS/Carmony et al. - 2016 - Extract Me If You Can Abusing PDF Parsers in Malw.pdf:application/pdf},
}

@book{adobesystemsPostScriptLanguageReference1999,
	address = {Reading, Mass},
	edition = {3rd ed},
	title = {{PostScript} {Language} {Reference} {Manual} ({Third} {Edition})},
	copyright = {Copyright Adobe Systems Inc.},
	isbn = {978-0-201-37922-8},
	shorttitle = {{PLRM} v3},
	language = {English},
	publisher = {Addison-Wesley},
	editor = {Adobe Systems},
	month = feb,
	year = {1999},
	keywords = {PostScript (Computer program language)},
	file = {Adobe Systems - 1999 - PostScript language reference.pdf:/Users/tullsen/Zotero/storage/B3I5G2Y6/Adobe Systems - 1999 - PostScript language reference.pdf:application/pdf},
}

@misc{martinschroderAdditionalPDFKeys2002,
	title = {Additional {PDF} keys inserted by {pdfTEX}},
	copyright = {GNU Free Documentation License, Version 1.1 or any later version},
	url = {https://northstar-www.dartmouth.edu/doc/texmf-dist/doc/pdftex/pdftex-pdfkeys/pdftex-pdfkeys.pdf},
	abstract = {A PDF document should contain only the structures and attributes defined in the PDF specification.  But the specification allows applications to insert additional keys,provided they follow certain rules [Appendix F]. The most important rule is that developers have to register with Adobe prefixes forthe keys they want to insert. This has been done for pdfTEX: Hans Hagen has registeredthe prefix “PTEX”.As this prefix is all caps keys should start with an additional “.”.},
	language = {English},
	publisher = {ArtCom GmbH},
	author = {{Martin Schroder}},
	month = may,
	year = {2002},
	annote = {See Also
https://tex.stackexchange.com/questions/95080/making-an-anonymous-pdf-file-using-pdflatex
https://tex.stackexchange.com/questions/95080/making-an-anonymous-pdf-file-using-pdflatex/225790\#225790 },
	file = {Martin Schr ̈oder - 2002 - Additional PDF keys inserted by pdfTEX.pdf:/Users/tullsen/Zotero/storage/VWGCX6F4/Martin Schr ̈oder - 2002 - Additional PDF keys inserted by pdfTEX.pdf:application/pdf},
}

@article{mullerAttacksBypassingConfidentiality2019,
	title = {Attacks bypassing confidentiality in encrypted {PDF}},
	url = {https://pdf-insecurity.org/encryption/encryption.html},
	language = {English},
	author = {Muller, Jens and Ising, Fabian and Mladenov, Vladislav and Mainka, Christian and Schinzel, Sebastian and Schwenk, Jorg},
	month = sep,
	year = {2019},
	pages = {42},
	file = {paper-pdf_encryption-ccs2019.pdf:/Users/tullsen/Zotero/storage/GT4CPDWK/paper-pdf_encryption-ccs2019.pdf:application/pdf;Muller et al. - Attacks bypassing confidentiality in encrypted PDF.pdf:/Users/tullsen/Zotero/storage/BIV5S45R/Muller et al. - Attacks bypassing confidentiality in encrypted PDF.pdf:application/pdf},
}

@article{bangertELFbacUsingLoader,
	title = {{ELFbac}: {Using} the {Loader} {Format} for {Intent}-{Level} {Semantics} and {Fine}-{Grained} {Protection}},
	url = {http://elfbac.org/bh16-elfbac-whitepaper.pdf},
	abstract = {Adversaries get software to do bad things by rewriting memory and changing control ﬂow. Current approaches to protecting against these attacks leave many exposures; for example, OS-level ﬁlesystem protection and OS/architecture support of the userspace/kernelspace distinction fail to protect corrupted userspace code from changing userspace data. In this paper we present a new approach: using the ELF/ABI sections already produced by the standard binary toolchain to deﬁne, specify, and enforce ﬁne-grained policy within an application’s address space. We experimentally show that enforcement of such policies would stop a large body of current attacks and discuss ways we could extend existing architecture to more eﬃciently provide such enforcement. Our approach is designed to work with existing ELF executables and the GNU build chain, but it can be extended into the compiler toolchains to support code annotations that take advantage of ELFbac enforcement—while maintaining full compatibility with the existing ELF ABI.},
	language = {English},
	author = {Bangert, Julian and Bratus, Sergey and Shapiro, Rebecca and Locasto, Michael E and Reeves, Jason and Smith, Sean W and Shubina, Anna},
	pages = {27},
	file = {Bangert et al. - ELFbac Using the Loader Format for Intent-Level S.pdf:/Users/tullsen/Zotero/storage/4WLZ2W6R/Bangert et al. - ELFbac Using the Loader Format for Intent-Level S.pdf:application/pdf},
}

@article{ianmarkwoodPDFMirageContent2017,
	title = {{PDF} {Mirage}: {Content} {Masking} {Attack} {Against} {Information}-{Based} {Online} {Services}},
	shorttitle = {Content {Masking}},
	url = {https://www.usenix.org/conference/usenixsecurity17/technical-sessions/presentation/markwood},
	abstract = {We present a new class of content masking attacks against the Adobe PDF standard, causing documents to appear to humans dissimilar to the underlying content extracted by information-based services. We show three attack variants with notable impact on real-world systems. Our first attack allows academic paper writers and reviewers to collude via subverting the automatic reviewer assignment systems in current use by academic conferences including INFOCOM, which we reproduced. Our second attack renders ineffective plagiarism detection software, particularly Turnitin, targeting specific small plagiarism similarity scores to appear natural and evade detection. In our final attack, we place masked content into the indexes for Bing, Yahoo!, and DuckDuckGo which renders as information entirely different from the keywords used to locate it, enabling spam, profane, or possibly illegal content to go unnoticed by these search engines but still returned in unrelated search results. Lastly, as these systems eschew optical character recognition (OCR) for its overhead, we offer a comprehensive and lightweight alternative mitigation method.},
	language = {English},
	author = {{Ian Markwood} and {Dakun Shen} and {Yao Liu} and {Zhuo Lu}},
	month = jun,
	year = {2017},
	note = {Video: https://www.usenix.org/conference/usenixsecurity17/technical-sessions/presentation/markwood},
	annote = {Summary
Interesting use cases (all demonstrated!) for content integrity attacks:

automatic reviewer assignments for academic papers
subversion of plagiarism detection
subversion of search engine indexing

Core attack vector is the difference between what a human sees/reads (the PDF rendering) vs the extractable information (e.g. text extraction).
Authors propose the use of manipulated fonts (altered glyph mappings INSIDE the font) but also acknowledge simpler methods such as use of CMaps, ToUnicode. No mention of invisible text rendering mode...
Mitigation includes OCR of rendering but OCR is not perfect (both in accuracy as well as knowing what should be OCR-ed, and what is just 'decoration' (e.g. images with text)).},
	file = {Ian Markwood et al. - PDF Mirage Content Masking Attack Against Informa.pdf:/Users/tullsen/Zotero/storage/I6KRLPRB/Ian Markwood et al. - PDF Mirage Content Masking Attack Against Informa.pdf:application/pdf},
}

@misc{AnatomyMaliciousPDF2010,
	title = {Anatomy of a malicious {PDF} file},
	url = {https://www.cert-ist.com/public/en/SO_detail?code=malicious_pdf},
	language = {English},
	urldate = {2019-10-14},
	journal = {Computer Emergency Response Team - Industrie Services et Tertiaire},
	month = feb,
	year = {2010},
}

@inproceedings{maassInnimboSandboxing2014,
	address = {New York, NY, USA},
	series = {{HotSoS} '14},
	title = {In-nimbo {Sandboxing}},
	isbn = {978-1-4503-2907-1},
	url = {http://doi.acm.org/10.1145/2600176.2600177},
	doi = {10.1145/2600176.2600177},
	abstract = {Sandboxes impose a security policy, isolating applications and their components from the rest of a system. While many sandboxing techniques exist, state of the art sandboxes generally perform their functions within the system that is being defended. As a result, when the sandbox fails or is bypassed, the security of the surrounding system can no longer be assured. We experiment with the idea of in-nimbo sandboxing, encapsulating untrusted computations away from the system we are trying to protect. The idea is to delegate computations that may be vulnerable or malicious to virtual machine instances in a cloud computing environment. This may not reduce the possibility of an in-situ sandbox compromise, but it could significantly reduce the consequences should that possibility be realized. To achieve this advantage, there are additional requirements, including: (1) A regulated channel between the local and cloud environments that supports interaction with the encapsulated application, (2) Performance design that acceptably minimizes latencies in excess of the in-situ baseline. To test the feasibility of the idea, we built an in-nimbo sandbox for Adobe Reader, an application that historically has been subject to significant attacks. We undertook a prototype deployment with PDF users in a large aerospace firm. In addition to thwarting several examples of existing PDF-based malware, we found that the added increment of latency, perhaps surprisingly, does not overly impair the user experience with respect to performance or usability.},
	language = {English},
	urldate = {2019-10-11},
	booktitle = {Proceedings of the 2014 {Symposium} and {Bootcamp} on the {Science} of {Security}},
	publisher = {ACM},
	author = {Maass, Michael and Scherlis, William L. and Aldrich, Jonathan},
	year = {2014},
	note = {event-place: Raleigh, North Carolina, USA},
	pages = {1:1--1:12},
	file = {hotsos14_slides.pdf:/Users/tullsen/Zotero/storage/2MQG9DXT/hotsos14_slides.pdf:application/pdf;Maass et al. - 2014 - In-nimbo Sandboxing.pdf:/Users/tullsen/Zotero/storage/PCQSWGTP/Maass et al. - 2014 - In-nimbo Sandboxing.pdf:application/pdf},
}

@misc{larrymasinterTheoryDocuments2019,
	title = {Theory of {Documents}},
	shorttitle = {Theory of {Documents}},
	url = {https://docs.google.com/document/export?format=pdf&id=1T0DRIZGX7Kh52u_S7at6Yeb-TiCGL3pmknr1XaVEx3s&token=AC4w5VicR7UUdw0jG_-syj_o9mZCFcPxvA%3A1569717076371&includes_info_params=true},
	abstract = {This session lays out the requirements for a theory of document formats, their properties, methods for designating and defining formats by specifications and standards, implementations, and test suites; the conversion of documents into other formats and the characterization of loss; the role of metadata and context in turning documents into records, the methods of content negotiation in requesting content formats, and other related topics.},
	language = {English},
	author = {{Larry Masinter}},
	year = {2019},
	file = {Larry Masinter - 2019 - Theory of Documents.pdf:/Users/tullsen/Zotero/storage/EZ35KCLD/Larry Masinter - 2019 - Theory of Documents.pdf:application/pdf},
}

@misc{nicolasseriotUnicodeHacks2014,
	address = {Switzerland},
	title = {Unicode {Hacks}},
	url = {http://seriot.ch/resources/talks_papers/20141106_asfws_unicode_hacks.pdf},
	language = {English},
	author = {{Nicolas Seriot}},
	month = nov,
	year = {2014},
	file = {Nicolas Seriot - 2014 - Unicode Hacks.pdf:/Users/tullsen/Zotero/storage/K2BUI5BB/Nicolas Seriot - 2014 - Unicode Hacks.pdf:application/pdf},
}

@misc{nicolasseriotSeriotChParsing,
	type = {Blog},
	title = {seriot.ch - {Parsing} {JSON} is a {Minefield} 💣},
	url = {http://seriot.ch/parsing_json.php},
	abstract = {JSON is the de facto standard when it comes to (un)serialising and exchanging data in web and mobile programming. But how well do you really know JSON? We'll read the specifications and write test cases together. We'll test common JSON libraries against our test cases. I'll show that JSON is not the easy, idealised format as many do believe. Indeed, I did not find two libraries that exhibit the very same behaviour. Moreover, I found that edge cases and maliciously crafted payloads can cause bugs, crashes and denial of services, mainly because JSON libraries rely on specifications that have evolved over time and that left many details loosely specified or not specified at all.},
	language = {English},
	urldate = {2019-09-25},
	author = {{Nicolas Seriot}},
}

@misc{johnsonRFC8118Application2017,
	title = {{RFC} 8118: {The} application/pdf {Media} {Type}},
	shorttitle = {{RFC} 8118},
	url = {https://tools.ietf.org/html/rfc8118},
	abstract = {The Portable Document Format (PDF) is an ISO standard (ISO 32000-1:2008) defining a final-form document representation language in use for document exchange, including on the Internet, since 1993. This document provides an overview of the PDF format and updates the media type registration of "application/pdf".  It obsoletes RFC 3778.},
	language = {English},
	urldate = {2019-09-29},
	publisher = {IETF},
	author = {Johnson, Duff and Masinter, Larry and Markovic, Dejan and Hardy, Matthew and Bailey, Martin},
	month = mar,
	year = {2017},
	note = {See also https://github.com/mrbhardy/pdfmime},
}

@misc{zillesRFC3778Application2004,
	title = {{RFC} 3778: {The} application/pdf {Media} {Type}},
	shorttitle = {{RFC} 3778},
	url = {https://tools.ietf.org/html/rfc3778},
	abstract = {This memo provides information for the Internet community.  It does not specify an Internet standard of any kind.  Distribution of this memo is unlimited.},
	language = {English},
	urldate = {2019-09-29},
	publisher = {IETF},
	author = {Zilles, Stephen and Masinter, Larry and Pravetz, James D. and Taft, Edward A.},
	month = may,
	year = {2004},
	note = {Obsoleted by RFC 8118},
}

@misc{masinterRFC7995PDF2016,
	title = {{RFC} 7995: {PDF} {Format} for {RFCs}},
	shorttitle = {{RFC} 7995},
	url = {https://tools.ietf.org/html/rfc7995},
	abstract = {This document discusses options and requirements for the PDF rendering of RFCs in the RFC Series, as outlined in RFC 6949.  It also discusses the use of PDF for Internet-Drafts, and available or needed software tools for producing and working with PDF.},
	language = {English},
	urldate = {2019-09-28},
	publisher = {IETF},
	author = {Masinter, Larry and Hansen, Tony and Hardy, Matthew},
	month = dec,
	year = {2016},
}

@inproceedings{tanInterruptorientedBugdoorProgramming2014,
	address = {New York, NY, USA},
	series = {{ACSAC} '14},
	title = {Interrupt-oriented {Bugdoor} {Programming}: {A} {Minimalist} {Approach} to {Bugdooring} {Embedded} {Systems} {Firmware}},
	isbn = {978-1-4503-3005-3},
	shorttitle = {Interrupt-oriented {Bugdoor} {Programming}},
	url = {http://doi.acm.org/10.1145/2664243.2664268},
	doi = {10.1145/2664243.2664268},
	abstract = {We demonstrate a simple set of interrupt-related vulnerability primitives that, despite being apparently innocuous, give attackers full control of a microcontroller platform. We then present a novel, minimalist approach to constructing deniable bugdoors for microcontroller firmware, and contrast this approach with the current focus of exploitation research on demonstrations of maximum computational power that malicious computation can achieve. Since the introduction of Return-oriented programming, an ever-increasing number of targets have been demonstrated to unintentionally yield Turing-complete computation environments to attackers controlling the target's various input channels, under ever more restrictive sets of limitations. Yet although modern OS defensive measures indeed require complex computations to bypass, this focus on maximum expressiveness of exploit programming models leads researchers to overlook other research directions for platforms that lack strong defensive measure but occur in mission-critical systems, namely, microcontrollers. In these systems, common exploiter goals such as sensitive code and data exfiltration or arbitrary code execution do not typically require complex computation; instead, a minimal computation is preferred and a simple set of vulnerability primitives typically suffices. We discuss examples of vulnerabilities and the new kinds of tools needed to avoid them in future firmware.},
	urldate = {2019-06-06},
	booktitle = {Proceedings of the 30th {Annual} {Computer} {Security} {Applications} {Conference}},
	publisher = {ACM},
	author = {Tan, Samuel Junjie and Bratus, Sergey and Goodspeed, Travis},
	year = {2014},
	note = {event-place: New Orleans, Louisiana, USA},
	keywords = {ACSAC proceedings, hacking, microprocessor exploitation, security},
	pages = {116--125},
	file = {Tan et al. - 2014 - Interrupt-oriented Bugdoor Programming A Minimali.pdf:/Users/tullsen/Zotero/storage/CQTLEWLE/Tan et al. - 2014 - Interrupt-oriented Bugdoor Programming A Minimali.pdf:application/pdf},
}

@article{lensassamanHaltingProblemsNetwork2011,
	title = {The {Halting} {Problems} of {Network} {Stack} {Insecurity}},
	volume = {36},
	url = {www.usenix.org},
	number = {6},
	journal = {:login:},
	author = {{Len Sassaman} and {Sergey Bratus} and {Anna Shubina} and {Meredith L. Patterson}},
	month = dec,
	year = {2011},
	file = {Sassaman.pdf:/Users/tullsen/Zotero/storage/JDYENS63/Sassaman.pdf:application/pdf},
}

@article{bratusExploitProgrammingBuffer2011,
	title = {Exploit {Programming}: from {Buffer} {Overflows} to {Weird} {Machines} and {Theory} of {Computation}},
	volume = {36},
	url = {www.usenix.org},
	abstract = {Hacker-driven exploitation research has developed into a discipline of its own, concerned with practical exploration of how unexpected computational properties arise in actual multi-layered, multi-component computing systems, and of what these systems could and could not compute as a result. The staple of this research is describing unexpected (and unexpectedly powerful) computational models inside targeted systems, which turn a part of the target into a so-called “weird machine” programmable by the attacker via crafted inputs (a.k.a. “exploits”).
Exploits came to be understood and written as programs for these “weird machines” and served as constructive proofs that a computation considered impossible could actually be performed by the targeted environment.
This research defined and fulfilled the need of such practical exploration in real systems that we must trust. Hacker research has also dominated this area, while academic analysis of the relevant computational phenomena lagged behind. We show that at its current sophistication and complexity, exploitation research as a discipline has come full circle to the fundamental questions of computability and language theory. Moreover, application of language-theoretic and computationtheoretic methods in it has already borne impressive results, helping to discover and redefine computational models and weaknesses previously overlooked. We believe it is time to bring the hacker craft of finding and programming “weird machines” inside targets and the theorists’ understanding of computational models together for the next step in designing secure, trustworthy computing systems.},
	number = {6},
	journal = {:login:},
	author = {Bratus, Sergey and Locasto, Michael E. and Patterson, Meredith L. and Sassaman, Len and Shubina, Anna},
	month = dec,
	year = {2011},
	file = {Bratus.pdf:/Users/tullsen/Zotero/storage/7DE6K8RT/Bratus.pdf:application/pdf},
}

@article{sassamanSecurityApplicationsFormal2013,
	title = {Security {Applications} of {Formal} {Language} {Theory}},
	volume = {7},
	issn = {1932-8184, 1937-9234, 2373-7816},
	url = {http://ieeexplore.ieee.org/document/6553401/},
	doi = {10.1109/JSYST.2012.2222000},
	abstract = {We present an approach to improving the security of complex, composed systems based on formal language theory, and show how this approach leads to advances in input validation, security modeling, attack surface reduction, and ultimately, software design and programming methodology. We cite examples based on real-world security ﬂaws in common protocols representing diﬀerent classes of protocol complexity. We also introduce a formalization of an exploit development technique, the parse tree diﬀerential attack, made possible by our conception of the role of formal grammars in security. These insights make possible future advances in software auditing techniques applicable to static and dynamic binary analysis, fuzzing, and general reverse-engineering and exploit development.},
	language = {en},
	number = {3},
	urldate = {2019-05-10},
	journal = {IEEE Systems Journal},
	author = {Sassaman, Len and Patterson, Meredith L. and Bratus, Sergey and Locasto, Michael E.},
	month = sep,
	year = {2013},
	pages = {489--500},
	file = {Sassaman et al. - 2013 - Security Applications of Formal Language Theory.pdf:/Users/tullsen/Zotero/storage/FW9695CA/Sassaman et al. - 2013 - Security Applications of Formal Language Theory.pdf:application/pdf},
}

@misc{CryptographicProtocolAnalysis2019,
	title = {Cryptographic protocol analysis for students and engineers.: {SymbolicSoft}/verifpal},
	copyright = {GPL-3.0},
	shorttitle = {Cryptographic protocol analysis for students and engineers.},
	url = {https://github.com/SymbolicSoft/verifpal},
	urldate = {2019-08-27},
	publisher = {Symbolic Software},
	month = aug,
	year = {2019},
	note = {original-date: 2019-05-17T09:22:49Z},
	file = {2019 - Cryptographic protocol analysis for students and e.pdf:/Users/tullsen/Zotero/storage/W68ANAX9/2019 - Cryptographic protocol analysis for students and e.pdf:application/pdf},
}

@inproceedings{bratusOverlookedFoundationsExploits2017,
	address = {Berkeley, CA, USA},
	series = {{CSET}'17},
	title = {Overlooked {Foundations}: {Exploits} {As} {Experiments} and {Constructive} {Proofs} in the {Science}-of-security},
	shorttitle = {Overlooked {Foundations}},
	url = {http://dl.acm.org/citation.cfm?id=3241074.3241084},
	abstract = {"The most important property of a program is whether it accomplishes the intentions of the user." [1]With these words, C.A.R. Hoare opened up his research programme, which we now recognize as a key part to the science of security--a still-nascent science, as Herley and Van Oorschot persuasively argued in [3]. Too many parts of that science are too well described by the Einstein quote about Mathematics in [3], "As far as the laws of Mathematics refer to reality they are not certain, and as far as they are certain they do not refer to reality"--largely due to the difficulties of mathematically describing the user intentions and their deviations from reality.},
	urldate = {2019-06-06},
	booktitle = {Proceedings of the 10th {USENIX} {Conference} on {Cyber} {Security} {Experimentation} and {Test}},
	publisher = {USENIX Association},
	author = {Bratus, Sergey and Shubina, Anna},
	year = {2017},
	note = {event-place: Vancouver, BC, Canada},
	pages = {10--10},
	file = {Bratus and Shubina - 2017 - Overlooked Foundations Exploits As Experiments an.pdf:/Users/tullsen/Zotero/storage/FFR5LL2A/Bratus and Shubina - 2017 - Overlooked Foundations Exploits As Experiments an.pdf:application/pdf},
}

@inproceedings{bratusImplementingVerticallyHardened2016,
	address = {New York, NY, USA},
	series = {{ICSS} '16},
	title = {Implementing a {Vertically} {Hardened} {DNP3} {Control} {Stack} for {Power} {Applications}},
	isbn = {978-1-4503-4788-4},
	url = {http://doi.acm.org/10.1145/3018981.3018985},
	doi = {10.1145/3018981.3018985},
	abstract = {We present an assurance methodology for producing significantly more secure implementations of SCADA/ICS protocols, and describe our case study of applying it to DNP3, in the form of a filtering proxy that deeply and exhaustively validates DNP3 messages. Unlike the vast majority of deployed proprietary DNP3 implementations, our code demonstrates resilience to state-of-the-art black-box as well as white-box fuzz-testing tools.},
	urldate = {2019-06-06},
	booktitle = {Proceedings of the {2Nd} {Annual} {Industrial} {Control} {System} {Security} {Workshop}},
	publisher = {ACM},
	author = {Bratus, Sergey and Crain, Adam J. and Hallberg, Sven M. and Hirsch, Daniel P. and Patterson, Meredith L. and Koo, Maxwell and Smith, Sean W.},
	year = {2016},
	note = {event-place: Los Angeles, CA, USA},
	pages = {45--53},
	file = {Bratus et al. - 2016 - Implementing a Vertically Hardened DNP3 Control St.pdf:/Users/tullsen/Zotero/storage/JNASNUY8/Bratus et al. - 2016 - Implementing a Vertically Hardened DNP3 Control St.pdf:application/pdf},
}

@article{sergeybratusBugsWeHave2015,
	title = {The {Bugs} we have to kill},
	volume = {40},
	url = {www.usenix.org},
	number = {4},
	journal = {:login:},
	author = {{Sergey Bratus} and {Meredith L. Patterson} and {Anna Shubina}},
	month = aug,
	year = {2015},
	file = {the-bugs-we-have-to-kill.pdf:/Users/tullsen/Zotero/storage/ASG8JVLJ/the-bugs-we-have-to-kill.pdf:application/pdf},
}

@inproceedings{momotSevenTurretsBabel2016,
	address = {Boston, MA, USA},
	title = {The {Seven} {Turrets} of {Babel}: {A} {Taxonomy} of {LangSec} {Errors} and {How} to {Expunge} {Them}},
	isbn = {978-1-5090-5589-0},
	shorttitle = {The {Seven} {Turrets} of {Babel}},
	url = {http://ieeexplore.ieee.org/document/7839788/},
	doi = {10.1109/SecDev.2016.019},
	abstract = {Input-handling bugs share two common patterns: insufﬁcient recognition, where input-checking logic is unﬁt to validate a program’s assumptions about inputs, and parser differentials, wherein two or more components of a system fail to interpret input equivalently. We argue that these patterns are artifacts of avoidable weaknesses in the development process and explore these patterns both in general and via recent CVE instances. We break ground on deﬁning the input-handling code weaknesses that should be actionable ﬁndings and propose a refactoring of existing CWEs to accommodate them. We propose a set of new CWEs to name such weaknesses that will help code auditors and penetration testers precisely express their ﬁndings of likely vulnerable code structures.},
	language = {en},
	urldate = {2019-05-10},
	booktitle = {2016 {IEEE} {Cybersecurity} {Development} ({SecDev})},
	publisher = {IEEE},
	author = {Momot, Falcon and Bratus, Sergey and Hallberg, Sven M. and Patterson, Meredith L.},
	month = nov,
	year = {2016},
	pages = {45--52},
	file = {Momot et al. - 2016 - The Seven Turrets of Babel A Taxonomy of LangSec .pdf:/Users/tullsen/Zotero/storage/L7VX92EG/Momot et al. - 2016 - The Seven Turrets of Babel A Taxonomy of LangSec .pdf:application/pdf},
}

@article{locastoDesignPatternsSecure2017,
	title = {Design {Patterns} for {Secure} {Input} {Handling}},
	volume = {42},
	url = {www.usenix.org},
	language = {en},
	number = {1},
	journal = {:login:},
	author = {Locasto, Michael E and Momot, Falcon D and Terson, Meredith L Pat and Shubina, Anna},
	year = {2017},
	pages = {8},
	file = {Locasto et al. - 2017 - Design Patterns for Secure Input Handling.pdf:/Users/tullsen/Zotero/storage/E3U9XDQJ/Locasto et al. - 2017 - Design Patterns for Secure Input Handling.pdf:application/pdf},
}

@article{bratusShotgunParsersCrosshairs2012,
	title = {Shotgun parsers in the cross-hairs},
	language = {en},
	author = {Bratus, Sergey and Patterson, Meredith L},
	month = oct,
	year = {2012},
	pages = {44},
	file = {Bratus and Patterson - Shotgun parsers in the cross-hairs.pdf:/Users/tullsen/Zotero/storage/H3P578XR/Bratus and Patterson - Shotgun parsers in the cross-hairs.pdf:application/pdf},
}

@article{bratusShotgunParsersMore2013,
	title = {From “shotgun parsers” to more secure stacks},
	language = {en},
	author = {Bratus, Sergey and Patterson, Meredith L and Hirsch, TQ’},
	month = nov,
	year = {2013},
	pages = {54},
	file = {Bratus et al. - From “shotgun parsers” to more secure stacks.pdf:/Users/tullsen/Zotero/storage/DDHLZAVP/Bratus et al. - From “shotgun parsers” to more secure stacks.pdf:application/pdf},
}

@article{bratusPlantedBugsTrusting2014,
	title = {Beyond {Planted} {Bugs} in "{Trusting} {Trust}": {The} {Input}-{Processing} {Frontier}},
	volume = {12},
	issn = {1540-7993},
	shorttitle = {Beyond {Planted} {Bugs} in "{Trusting} {Trust}"},
	url = {http://ieeexplore.ieee.org/document/6756892/},
	doi = {10.1109/MSP.2014.1},
	language = {en},
	number = {1},
	urldate = {2019-05-10},
	journal = {IEEE Security \& Privacy},
	author = {Bratus, Sergey and Darley, Trey and Locasto, Michael and Patterson, Meredith L. and Shapiro, Rebecca Bx and Shubina, Anna},
	month = jan,
	year = {2014},
	pages = {83--87},
	file = {Bratus et al. - 2014 - Beyond Planted Bugs in Trusting Trust The Input.pdf:/Users/tullsen/Zotero/storage/BD3PG32T/Bratus et al. - 2014 - Beyond Planted Bugs in Trusting Trust The Input.pdf:application/pdf},
}

@misc{bangertPracticalInterfaceGenerator2014,
	title = {A practical interface generator for data formats. : jbangert/nail},
	copyright = {MIT},
	shorttitle = {A practical interface generator for data formats.},
	url = {https://github.com/jbangert/nail},
	abstract = {Nail is an interface generator that allows programmers to safely parse and generate protocols defined by a Parser-Expression based grammar. Nail also provides solutions for parsing common patterns such as length and offset fields within binary formats that are hard to process with existing parser generators.
The code generator (in /generator/) requires a C++ compiler and depends on boost. The code generator can either be invoked as ./nail foo.nail , which case it will emit C++ code (in foo.nail.cc and foo.nail.hh) , or as ./cnail foo.nail in which case it will emit a two-pass parser in C (in foo.nail.c and foo.nail.h) . Generated code has no dependencies (and does not even use the C++ or C standard library). longjmp.h/setjmp.h is used for handling out of memory errors.
Several examples are provided, pull requests for more are very welcome!
    /dns - DNS server and resolver implemented in C. Used for older benchmarks
    /dns\_cpp - DNS server implemented in C++, used for newer benchmarks
    /zip - ZIP extractor,compressor in C.
    /protozip - Simplified 'pretend' zip, easier to grok
    /utf16 - mini grammar for UTF-16 -- good starting point},
	author = {Bangert, Julian},
	note = {original-date: 2014-04-02T01:17:56Z},
}

@misc{GagalliumVerifyingParser,
	title = {Gagallium : {Verifying} a parser for a {C} compiler},
	url = {http://gallium.inria.fr/blog/verifying-a-parser-for-a-c-compiler/},
	file = {Gagallium  Verifying a parser for a C compiler.pdf:/Users/tullsen/Zotero/storage/N5CQMSTS/Gagallium  Verifying a parser for a C compiler.pdf:application/pdf},
}

@misc{ApacheDaffodilDFDL,
	title = {Apache {Daffodil} ({DFDL})},
	url = {https://daffodil.apache.org/},
	urldate = {2019-07-23},
}

@misc{DFDLSchemasCommercial,
	title = {{DFDL} {Schemas} for {Commercial} and {Scientific} {Data} {Formats}},
	url = {https://github.com/DFDLSchemas},
	abstract = {DFDL Schemas for Commercial and Scientific Data Formats has 21 repositories available. Follow their code on GitHub.},
	language = {en},
	urldate = {2019-07-23},
	journal = {GitHub},
}

@misc{DFDLOpenGrid,
	title = {{DFDL} ({Open} {Grid} {Forum})},
	url = {https://www.ogf.org/ogf/doku.php/standards/dfdl/dfdl},
	file = {GFD.207.pdf:/Users/tullsen/Zotero/storage/437JG4AR/GFD.207.pdf:application/pdf;GFD.197.pdf:/Users/tullsen/Zotero/storage/NHHZ4VS2/GFD.197.pdf:application/pdf;GFD.190.pdf:/Users/tullsen/Zotero/storage/2BQHFC6X/GFD.190.pdf:application/pdf;GFD.214.pdf:/Users/tullsen/Zotero/storage/88T5JB7M/GFD.214.pdf:application/pdf;GFD.215.pdf:/Users/tullsen/Zotero/storage/YB7BZG54/GFD.215.pdf:application/pdf},
}

@misc{ParsingNomGentle,
	title = {Parsing with {Nom} - {A} {Gentle} {Introduction} to {Rust}},
	url = {https://stevedonovan.github.io/rust-gentle-intro/nom-intro.html},
}

@misc{ruomingpangBinpacYaccWriting2006,
	title = {binpac: {A} yacc for {Writing} {Application} {Protocol} {Parsers}},
	shorttitle = {binpac},
	url = {https://www.icsi.berkeley.edu/pubs/networking/binpacIMC06.pdf},
	abstract = {A key step in the semantic analysis of network traf c is to parse the traf c stream according to the high-level protocols it contains. This process transforms raw bytes into structured, typed, and semantically meaningful data  elds that provide a high-level representation of the traf c. However, constructing protocol parsers by hand is a tedious and error-prone affair due to the complexity and sheer number of application protocols.
This paper presents binpac, a declarative language and compiler designed to simplify the task of constructing robust and ef - cient semantic analyzers for complex network protocols. We discuss the design of the binpac language and a range of issues in generating ef cient parsers from high-level speci cations. We have used binpac to build several protocol parsers for the ?Bro? network intrusion detection system, replacing some of its existing analyzers (handcrafted in C++), and supplementing its operation with analyzers for new protocols. We can then use Bro's powerful scripting language to express application-level analysis of network traf c in high-level terms that are both concise and expressive. binpac is now part of the open-source Bro distribution.},
	language = {English},
	publisher = {ACM},
	author = {{Ruoming Pang} and {Venn Paxson} and {Robin Sommer} and {Larry Peterson}},
	month = oct,
	year = {2006},
	file = {Ruoming Pang et al. - 2006 - binpac A yacc for Writing Application Protocol Pa.pdf:/Users/tullsen/Zotero/storage/Z98U79B6/Ruoming Pang et al. - 2006 - binpac A yacc for Writing Application Protocol Pa.pdf:application/pdf},
}

@misc{MarpaParser,
	title = {The {Marpa} parser},
	url = {https://jeffreykegler.github.io/Marpa-web-site/},
}

@misc{victorstinnerHachoirDocumentationPython2019,
	title = {Hachoir {Documentation} ({Python})},
	shorttitle = {Hachoir},
	url = {https://buildmedia.readthedocs.org/media/pdf/hachoir/latest/hachoir.pdf},
	abstract = {Hachoir is a Python library to view and edit a binary stream field by field. In other words, Hachoir allows you to “browse” any binary stream just like you browse directories and files. A file is splitted in a tree of fields, where the smallest field is just one bit. Examples of fields types: integers, strings, bits, padding types, floats, etc. Hachoir is the French word for a meat grinder (meat mincer), which is used by butchers to divide meat into long tubes; Hachoir is used by computer butchers to divide binary files into fields.},
	language = {English},
	author = {{Victor Stinner}},
	month = apr,
	year = {2019},
	file = {Victor Stinner - 2019 - Hachoir Documentation (Python).pdf:/Users/tullsen/Zotero/storage/JQXK4APN/Victor Stinner - 2019 - Hachoir Documentation (Python).pdf:application/pdf},
}

@misc{kathleenfisherPADSProjectOverview2011,
	title = {The {PADS} {Project}: {An} {Overview}},
	shorttitle = {{PADS}},
	abstract = {The goal of the PADS project, which started in 2001, is to make it easier for data analysts to extract useful information from ad hoc data files. This paper does not report new results, but rather gives an overview of the project and how it helps bridge the gap between the unmanaged world of ad hoc data and the managed world of typed programming languages and databases. In particular, the paper reviews the design of PADS data description languages, describes the generated parsing tools and discusses the importance of meta-data. It also sketches the formal semantics, discusses useful tools and how can they can be generated automatically from PADS descriptions, and describes an inferencing system that can learn useful PADS descriptions from positive examples of the data format.},
	language = {English},
	publisher = {ACM},
	author = {{Kathleen Fisher} and {David Walker}},
	year = {2011},
	file = {Kathleen Fisher and David Walker - 2011 - The PADS Project An Overview.pdf:/Users/tullsen/Zotero/storage/P54N695W/Kathleen Fisher and David Walker - 2011 - The PADS Project An Overview.pdf:application/pdf;pldi.pdf:/Users/tullsen/Zotero/storage/4ESYXDMT/pldi.pdf:application/pdf},
}

@misc{PadshaskellPADSData,
	title = {pads-haskell: {PADS} data description language for {Haskell}.},
	shorttitle = {pads-haskell},
	url = {//hackage.haskell.org/package/pads-haskell},
	abstract = {Install via `cabal install pads-haskell`.},
	journal = {Hackage},
}

@misc{KaitaiStructDeclarative,
	title = {Kaitai {Struct}: declarative binary format parsing language},
	url = {https://kaitai.io/},
}

@misc{adobeAdobeDVAFormal2012,
	title = {Adobe {DVA} {Formal} {Grammar}},
	copyright = {Copyright Adobe Systems Inc.},
	shorttitle = {Adobe {DVA} {Formal} {Grammar}},
	abstract = {This document outlines the PDF formal representation that is used by Adobe’s Dictionary Validation Agent (DVA) plug- in.
The DVA plug-in needs a way to check the key value pairs in PDF dictionaries that is separate from the code, easily updateable, reusable, and accessible. A generic formal representation of PDF dictionaries and keyvalue pairs was created using a text format that could be „compiled“ into other formats as necessary for specific processors.
The formal representation starts with the FormalRepTree. However, it is everything else defined under this - a large number of object descriptors (dictionaries), operator and array descriptors - that provide the formal representations for the types defined within ISO 32000-1, as well as some Adobe Extensions that are now part of ISO 32000-2. Those object, operator and array descriptors are described in the following sections.},
	language = {English},
	publisher = {Adobe Systems Inc.},
	author = {{Adobe}},
	month = aug,
	year = {2012},
	note = {The DVA specification is restricted to SafeDocs performers under ACA.
This is the same document as provided to ISO TC 171 SC 2 WG 8 (as N-documents).},
	keywords = {Grammar},
	file = {Adobe - 2012 - Adobe DVA Formal Grammar.pdf:/Users/tullsen/Zotero/storage/6Z4UJGQF/Adobe - 2012 - Adobe DVA Formal Grammar.pdf:application/pdf;DVA Formal Representation 11-2012.pdf:/Users/tullsen/Zotero/storage/XKPDGJ8X/DVA Formal Representation 11-2012.pdf:application/pdf},
}

@misc{davidfifieldBetterZipBomb2019,
	title = {A better zip bomb},
	url = {https://www.bamsoftware.com/hacks/zipbomb/},
	abstract = {This article shows how to construct a non-recursive zip bomb that achieves a high compression ratio by overlapping files inside the zip container. "Non-recursive" means that it does not rely on a decompressor's recursively unpacking zip files nested within zip files: it expands fully after a single round of decompression. The output size increases quadratically in the input size, reaching a compression ratio of over 28 million (10 MB --{\textgreater} 281 TB) at the limits of the zip format. Even greater expansion is possible using 64-bit extensions. The construction uses only the most common compression algorithm, DEFLATE, and is compatible with most zip parsers. 

IS IT POSSIBLE TO DO THIS WITH PDF /FlateDecode (or multiple chained /FlateDecode) filters?},
	urldate = {2019-07-04},
	author = {{David Fifield}},
	month = jul,
	year = {2019},
	file = {David Fifield - 2019 - A better zip bomb.pdf:/Users/tullsen/Zotero/storage/S2PIA52K/David Fifield - 2019 - A better zip bomb.pdf:application/pdf},
}

@misc{ieeepwgIEEEPWG51022004,
	title = {{IEEE} {PWG} 5102.3-2004: {Portable} {Document} {Format}: {Image}-{Streamable} ({PDF}/is)},
	shorttitle = {{PDF}/is},
	url = {https://istopwg.github.io/standards.html},
	abstract = {This document specifies an application of PDF (Portable Document Format) that has two important properties: First, it is an “image”-based format, and proper rendering of the document is represented by (binary or color) images. Second, the format is suitable for incremental generation and thus it is a “streaming” format. The subset is called “PDF/is”, for “PDF Image-Streamable”.},
	language = {English},
	publisher = {IEEE Printer Working Group (PWG)},
	author = {{IEEE PWG}},
	month = mar,
	year = {2004},
	keywords = {PDF/raster, IEEE, PDF/is},
	file = {IEEE PWG - 2004 - PWG 5102.3-2004 Portable Document Format Image-S.pdf:/Users/tullsen/Zotero/storage/93PI53IA/IEEE PWG - 2004 - PWG 5102.3-2004 Portable Document Format Image-S.pdf:application/pdf},
}

@misc{francoisfernandesLevigoPDFFormal2013,
	title = {Levigo {PDF} {Formal} {Representation}},
	shorttitle = {Levigo {PDF} {Formal} {Representation}},
	abstract = {A language proposal for formal description of elements within PDF documents. Proposed to ISO TC 171 SC 2 WG 8.

This document describes a modelling based PDF Tooling designed and developed by the levigo solutions gmbh. The intention of this site is to demonstrate and document the current state, which we offer the ISO TC 171 as an addition and/or alternative syntax to the current "Format Representation" efforts. The tooling, described at the end of this document, is an addition to the syntax. If the ISO TC is interested in this syntax, levigo will open source the tooling.},
	language = {English},
	publisher = {Levigo Solutions gmbh.},
	author = {{Francois Fernandes} and {Sebastian Holder}},
	month = apr,
	year = {2013},
	note = {See ISO-TC171-SC2-WG8\_N0641\_2013-04-12\_PDF\_Formal\_Syntax.pdf.
Francois agreed to share this with SafeDocs performers and DARPA.},
	keywords = {Grammar},
	file = {Francois Fernandes and Sebastian Holder - 2013 - PDF Formal Representation.pdf:/Users/tullsen/Zotero/storage/9SZTNBXF/Francois Fernandes and Sebastian Holder - 2013 - PDF Formal Representation.pdf:application/pdf},
}

@misc{adobeAdobeTechnicalNote2001,
	title = {Adobe {Technical} {Note} \#5172: {Highlight} {File} {Format}},
	copyright = {Copyright Adobe Systems Inc.},
	shorttitle = {Highlight {File} {Format}},
	abstract = {This technical note describes the file format and URL specification that allows a Web server to highlight text in a PDF file being displayed by version 3.0 and above of the Adobe® Acrobat® viewers (Acrobat and Reader).

To use the highlight file, the Acrobat viewer must include the highlight plug-in and the External Window Handler (EWH) plug-in. In addition, the Web browser must use the Adobe Acrobat plug-in for Netscape Navigator — any Web browser that accepts this Netscape Navigator plug-in can be used. Highlight files can also be used with the ActiveX control for Internet Explorer 4.0 and later.

The highlight file is an ASCII file that is downloaded separately by the Web browser and used by Acrobat to highlight words in the PDF file.},
	language = {English},
	publisher = {Adobe Systems Inc.},
	author = {{Adobe}},
	month = jun,
	year = {2001},
	note = {Support removed in Acrobat DC},
	file = {2001 - Adobe Technical Note #5172 Highlight File Format.pdf:/Users/tullsen/Zotero/storage/963SEIZT/2001 - Adobe Technical Note #5172 Highlight File Format.pdf:application/pdf},
}

@misc{PDFFileFormat2016,
	title = {The {PDF} file format: {A} work in progress},
	shorttitle = {The {PDF} file format},
	url = {https://sdtimes.com/adobe/pdf-file-format-work-progress/},
	abstract = {The industry continues to perfect the standard beyond version 2.0},
	language = {English},
	urldate = {2019-05-06},
	journal = {SD Times},
	month = nov,
	year = {2016},
}

@misc{maibergWhyPDFSecretly2018,
	title = {Why the {PDF} {Is} {Secretly} the {World}'s {Most} {Important} {File} {Format}},
	url = {https://motherboard.vice.com/en_us/article/pam43n/why-the-pdf-is-secretly-the-worlds-most-important-file-format},
	abstract = {The story of the PDF, the file format that’s become one of the internet’s defining information tools. It’ll be with us after we’re long gone.},
	language = {English},
	urldate = {2019-05-06},
	journal = {Motherboard},
	author = {Maiberg, Emanuel and Smith, Ernie},
	month = mar,
	year = {2018},
	keywords = {File, Format, History, Pdf, Robert Mueller},
}

@misc{angealbertiniTrustingFilesTheir2015,
	title = {Trusting files (and their formats)},
	url = {https://speakerdeck.com/ange/trusting-files},
	language = {English},
	author = {{Ange Albertini}},
	month = oct,
	year = {2015},
	file = {Ange Albertini - 2015 - Trusting files (and their formats).pdf:/Users/tullsen/Zotero/storage/5PPHZXB2/Ange Albertini - 2015 - Trusting files (and their formats).pdf:application/pdf},
}

@misc{angealbertiniImprovingFileFormats2019,
	title = {Improving {File} {Formats}},
	url = {https://speakerdeck.com/ange/improving-file-formats-from-to},
	author = {{Ange Albertini}},
	month = may,
	year = {2019},
	file = {Ange Albertini - 2019 - Improving File Formats.pdf:/Users/tullsen/Zotero/storage/7QT5GG88/Ange Albertini - 2019 - Improving File Formats.pdf:application/pdf},
}

@techreport{HiddenDataMetadata2008,
	address = {USA},
	title = {Hidden {Data} and {Metadata} in {Adobe} {PDF} {Files}: {Publication} {Risks} and {Countermeasures}},
	abstract = {This paper describes procedures for sanitizing PDF documents for static publication. Sanitization for the purpose of this document means removing hidden data and dynamic content not intended for publication (for example, the username of the author or interim editing comments embedded in the file but not visible on any pages).
The types of PDF documents that are addressed by this document include those converted from source formats such as Microsoft Office, Adobe FrameMaker, and any other native application. PDF documents produced through unknown sources should also utilize these sanitization procedures for static output.},
	language = {English},
	institution = {National Security Agency (NSA), Enterprise Applications Division of the Systems and Network Analysis Center (SNAC) Information Assurance Directorate},
	month = jul,
	year = {2008},
	pages = {26},
	file = {Hidden Data and Metadata in Adobe PDF Files.pdf:/Users/tullsen/Zotero/storage/DDNITBW9/Hidden Data and Metadata in Adobe PDF Files.pdf:application/pdf},
}

@article{neweUsingInteractive3D2016,
	title = {Using {Interactive} {3D} {PDF} for {Exploring} {Complex} {Biomedical} {Data}: {Experiences} and {Solutions}},
	volume = {228},
	issn = {1879-8365},
	shorttitle = {Using {Interactive} {3D} {PDF} for {Exploring} {Complex} {Biomedical} {Data}},
	url = {https://pubmed.ncbi.nlm.nih.gov/27577484/},
	abstract = {The Portable Document Format (PDF) is the most commonly used file format for the exchange of electronic documents. A lesser-known feature of PDF is the possibility to embed three-dimensional models and to display these models interactively with a qualified reader. This technology is well suited to present, to explore and to communicate complex biomedical data. This applies in particular for data which would suffer from a loss of information if it was reduced to a static two-dimensional projection. In this article, we present applications of 3D PDF for selected scholarly and clinical use cases in the biomedical domain. Furthermore, we present a sophisticated tool for the generation of respective PDF documents.},
	language = {eng},
	journal = {Studies in Health Technology and Informatics},
	author = {Newe, Axel and Becker, Linda},
	year = {2016},
	pmid = {27577484},
	keywords = {Software, Computational Biology, Humans, Imaging, Three-Dimensional, Information Dissemination},
	pages = {740--744},
}

@inproceedings{alvarez2D3DTeaching2014,
	title = {From {2D} to {3D}: {Teaching} terrain representation in engineering studies through {Augmented} reality: {Comparative} versus {3D} pdf},
	shorttitle = {From {2D} to {3D}},
	url = {https://ieeexplore.ieee.org/document/7044193},
	doi = {10.1109/FIE.2014.7044193},
	abstract = {Engineering students have got great skills in the use of new technologies, but large deficiencies in terms of the ability to visualize three-dimensional models shown in two dimensions. Also, the spatial capabilities are critical to them, to achieve the further development and understanding of associated complex, linked contents and competencies. However, it is difficult to illustrate the relationship between the 3D geometry and 2D projection using only drawings on blackboard. A new way to show these concepts is needed. Since Augmented reality (AR) and 3D pdf are the low-cost technologies that could easily display the relationship between two-dimensional displayed representations and the object shown. This paper presents early results conducted through the education innovation project Pol+AR. This project aims to determine the 3D-2D connection, using models, based on AR and 3D pdf, improving the spatial visualisation ability in students, specifically in the study of surveying and its applications, knowledge of the contour lines, earthworks and profiles. Studies have been developed in VET, at CFGS Proyectos de Edificación, (Building projects), at IES Politécnico Jesús Marín of Málaga. Finally, the implementation of this research will be carried out at Universidad de Málaga, Escuela Politécnica Superior.},
	booktitle = {2014 {IEEE} {Frontiers} in {Education} {Conference} ({FIE}) {Proceedings}},
	author = {Álvarez, Ayala and Javier, Francisco and Parra, Blázquez and Beatriz, Elidia and Tubio, Montes and de Paula, Francisco},
	month = oct,
	year = {2014},
	note = {ISSN: 2377-634X},
	keywords = {Portable document format, Solid modeling, Three-dimensional displays, augmented reality, Augmented reality, Computational modeling, contour lines, earthworks, Education, Geometry, graphic expression, ICT, profiles, spatial vision, Surveying},
	pages = {1--4},
	file = {Álvarez et al. - 2014 - From 2D to 3D Teaching terrain representation in .pdf:/Users/tullsen/Zotero/storage/RJXPPIQ7/Álvarez et al. - 2014 - From 2D to 3D Teaching terrain representation in .pdf:application/pdf},
}

@inproceedings{banjoModelBasedVirtual2015,
	title = {Model based virtual engineering approach to remanufacturing design},
	url = {https://ieeexplore.ieee.org/document/7237251},
	doi = {10.1109/SAI.2015.7237251},
	abstract = {Although remanufacturing can be both profitable and less harmful to the environment than traditional manufacturing, its uptake is still limited. Previous research shows that this limitation could be the consequence of poor definition of remanufacturing and confusion with terms like second-hand or refurbished products. Other research claims that this downturn is due to the fact that OEMs are not actively engaged in remanufacturing and therefore the products are not designed for remanufacture; inadvertently increasing the cost of remanufacturing. To make the remanufacturing oriented design, it is important to have effective flow of communication across the supply chain with the help of virtual engineering tools. This research investigates the model based virtual engineering approach in the search for a unique solution that would foster the growth of remanufacturing.},
	booktitle = {2015 {Science} and {Information} {Conference} ({SAI})},
	author = {Banjo, Olatunde and Jain, Atul and Chen, Feng},
	month = jul,
	year = {2015},
	keywords = {Solid modeling, Three-dimensional displays, 3D PDF, Software, Assembly, Design automation, remanufacturing, simulation, Supply chains, virtual engineering},
	pages = {916--920},
	file = {Banjo et al. - 2015 - Model based virtual engineering approach to remanu.pdf:/Users/tullsen/Zotero/storage/H9IZXV93/Banjo et al. - 2015 - Model based virtual engineering approach to remanu.pdf:application/pdf},
}

@inproceedings{sofianLSBSteganographyAES2019,
	title = {{LSB} {Steganography} and {AES} {Encryption} for {Multiple} {PDF} {Documents}},
	url = {https://ieeexplore.ieee.org/document/8981842},
	doi = {10.1109/CONMEDIA46929.2019.8981842},
	abstract = {Security is one important issue regarding data and information today. Steganography along with cryptography is used to hide and secure information. It is already an established practice to put watermark and to secure a single document using encryption. However, for large information requiring multiple documents has not received any attention. Here a program which can do steganography to multiple PDF documents to increase its steganography capacity is proposed, it is named MPDFStego. One of the steganography technique chosen in this research is the Least Significant Bit (LSB). LSB technique can be applied to Portable Document Format (PDF) by making use of its Tj Operator. Secret text is encrypted using Advanced Encryption Standard with 128-bit key size in Cipher Block Chaining (CBC) mode before it is embedded into the PDF document(s). Recompression and decompression of the PDF document(s) are done outside MPDFStego, with the help of QPDF tool. Testing and evaluation show that the MPDFStego is capable of hiding the secret text into one or many PDF documents (s).},
	booktitle = {2019 5th {International} {Conference} on {New} {Media} {Studies} ({CONMEDIA})},
	author = {Sofian, Naldiyanto and Wicaksana, Arya and Hansun, Seng},
	month = oct,
	year = {2019},
	keywords = {PDF, Portable document format, Steganography, Standards, Tools, Encryption, AES, Informatics, LSB, Password, Tj Operator},
	pages = {100--105},
	file = {Sofian et al. - 2019 - LSB Steganography and AES Encryption for Multiple .pdf:/Users/tullsen/Zotero/storage/WJ5WNF4T/Sofian et al. - 2019 - LSB Steganography and AES Encryption for Multiple .pdf:application/pdf},
}

@inproceedings{hossainParameterOptimizationClassification2020,
	title = {Parameter {Optimization} of {Classification} {Techniques} for {PDF} based {Malware} {Detection}},
	doi = {10.1109/ICCIT51783.2020.9392685},
	abstract = {Embedding malicious codes in Portable Document Format (PDF) files has been one of the prime a venues i n a never-ending cat and mouse game between malware authors and malware analysts in past decade. By planting such malicious files in a victim machine by any means, e.g., email attachments, adversaries pose severe damage from personal assets to large business network infrastructure. To combat this menace, academic and industry based security researchers have been carrying out machine learning based techniques to discover novel types of PDF based malware. In our study, we examine state-of-the-art classification techniques that have been previously experimented in this active area of research to empirically derive the optimal parameter settings of such highly configurable algorithms. We investigate, 20 classifiers derived from 9 machine learning families on a large dataset. To evaluate our models’ performances, we report precision, recall, and F1 scores along with accuracy and provide a comparison chart to show how each family stands up to the expectation. We observe, 16 out of 20 studied classifiers achieve better results compared to its respective default configuration. As parameter optimization problem is very less talked about in this application domain, we are optimistic that our detailed experimental analysis will help select the optimal settings of a classifier from the studied machine learning algorithm suite.},
	booktitle = {2020 23rd {International} {Conference} on {Computer} and {Information} {Technology} ({ICCIT})},
	author = {Hossain, Sm Mukbul and Ayub, Md. Ahsan},
	month = dec,
	year = {2020},
	keywords = {Portable document format, Malware, Machine Learning, Security, Boosting, Classification Techniques, Machine learning algorithms, Malicious PDF Detection, Mice, Optimization, Parameter Optimization},
	pages = {1--6},
	file = {Hossain and Ayub - 2020 - Parameter Optimization of Classification Technique.pdf:/Users/tullsen/Zotero/storage/AK4N2M67/Hossain and Ayub - 2020 - Parameter Optimization of Classification Technique.pdf:application/pdf;IEEE Xplore Abstract Record:/Users/tullsen/Zotero/storage/8FKD7TB7/9392685.html:text/html},
}

@inproceedings{fedchenkoPDFDocumentRendering2021,
	title = {{PDF} {Document} {Rendering} on {Mobile} {Devices} in the {Case} of {Aurora} {OS}},
	url = {https://ieeexplore.ieee.org/document/9435532},
	doi = {10.23919/FRUCT52173.2021.9435532},
	abstract = {The article is devoted to the problem of displaying the PDF documents on mobile devices in the case of Aurora OS. The performance problem becomes extremely important for mobile applications. Other technical requirements are also formulated. In case of using third-party libraries it is necessary to take into account the licenses under which they are distributed. The basic requirements are met by Poppler, PDFium and muPDF libraries. This article describes the API of these libraries and analyzes the speed of rendering and the quality of the result.},
	booktitle = {2021 29th {Conference} of {Open} {Innovations} {Association} ({FRUCT})},
	author = {Fedchenko, Alexey and Chuvilin, Kirill},
	month = may,
	year = {2021},
	note = {ISSN: 2305-7254},
	keywords = {Portable document format, Ion radiation effects, Licenses, Performance evaluation, Rendering (computer graphics), Technical requirements, Technological innovation},
	pages = {118--124},
	file = {Fedchenko and Chuvilin - 2021 - PDF Document Rendering on Mobile Devices in the Ca.pdf:/Users/tullsen/Zotero/storage/PVCVE9YU/Fedchenko and Chuvilin - 2021 - PDF Document Rendering on Mobile Devices in the Ca.pdf:application/pdf},
}

@inproceedings{elingiustiPDFMalwareDetectionSurvey2018,
	title = {{PDF}-{Malware} {Detection}: {A} {Survey} and {Taxonomy} of {Current} {Techniques}},
	shorttitle = {{PDF}-{Malware} {Detection}},
	url = {https://www.semanticscholar.org/paper/PDF-Malware-Detection%3A-A-Survey-and-Taxonomy-of-Elingiusti-Aniello/9a3cb28dbc98d2902d160b2b4b289012f101d62e},
	doi = {10.1007/978-3-319-73951-9_9},
	abstract = {Portable Document Format, more commonly known as PDF, has become, in the last 20 years, a standard for document exchange and dissemination due its portable nature and widespread adoption. The flexibility and power of this format are not only leveraged by benign users, but from hackers as well who have been working to exploit various types of vulnerabilities, overcome security restrictions, and then transform the PDF format in one among the leading malicious code spread vectors. Analyzing the content of malicious PDF files to extract the main features that characterize the malware identity and behavior, is a fundamental task for modern threat intelligence platforms that need to learn how to automatically identify new attacks. This paper surveys existing state of the art about systems for the detection of malicious PDF files and organizes them in a taxonomy that separately considers the used approaches and the data analyzed to detect the presence of malicious code.},
	author = {Elingiusti, Michele and Aniello, L. and Querzoni, Leonardo and Baldoni, R.},
	year = {2018},
	file = {Elingiusti et al. - 2018 - PDF-Malware Detection A Survey and Taxonomy of Cu.pdf:/Users/tullsen/Zotero/storage/EGKS9HLZ/Elingiusti et al. - 2018 - PDF-Malware Detection A Survey and Taxonomy of Cu.pdf:application/pdf},
}

@article{nissimDetectionMaliciousPDF2015,
	title = {Detection of malicious {PDF} files and directions for enhancements: {A} state-of-the art survey},
	volume = {48},
	shorttitle = {Detection of malicious {PDF} files and directions for enhancements},
	url = {https://www.semanticscholar.org/paper/Detection-of-malicious-PDF-files-and-directions-for-Nissim-Cohen/1f6494edd1c82e0969e0b680ff71f02c4f918c3f},
	doi = {10.1016/j.cose.2014.10.014},
	abstract = {Initial penetration is one of the first steps of an Advanced Persistent Threat (APT) attack, and it is considered one of the most significant means of initiating cyber-attacks aimed at organizations. Such an attack usually results in the loss of sensitive and confidential information. Because email communication is an integral part of daily business operations, APT attackers frequently leverage email as an attack vector for initial penetration of the targeted organization. Emails allow the attacker to deliver malicious attachments or links to malicious websites. Attackers usually use social engineering in order to make the recipient open the malicious email, open the attachment, or press a link. Existing defensive solutions within organizations prevent executables from entering organizational networks via emails, therefore, recent APT attacks tend to attach non-executable files (PDF, MS Office etc.) which are widely used in organizations and mistakenly considered less suspicious or malicious. This article surveys existing academic methods for the detection of malicious PDF files. The article outlines an Active Learning framework and highlights the correlation between structural incompatibility of PDF files and their likelihood of maliciousness. Finally, we provide comparisons, insights and conclusions, as well as avenues for future research in order to enhance the detection of malicious PDFs.},
	journal = {Computers \& Security},
	author = {Nissim, Nir and Cohen, Aviad and Glezer, Chanan and Elovici, Yuval},
	year = {2015},
	pages = {246--266},
}

@article{rohlmannBreakingSpecificationPDF2021,
	title = {Breaking the {Speciﬁcation}: {PDF} {Certiﬁcation}},
	url = {https://www.pdf-insecurity.org/download/pdf-certification/paper.pdf},
	abstract = {The Portable Document Format (PDF) is the defacto standard for document exchange. The PDF speciﬁcation deﬁnes two different types of digital signatures to guarantee the authenticity and integrity of documents: approval signatures and certiﬁcation signatures. Approval signatures testify one speciﬁc state of the PDF document. Their security has been investigated at CCS’19. Certiﬁcation signatures are more powerful and ﬂexible. They cover more complex workﬂows, such as signing contracts by multiple parties. To achieve this goal, users can make speciﬁc changes to a signed document without invalidating the signature. This paper presents the ﬁrst comprehensive security evaluation on certiﬁcation signatures in PDFs. We describe two novel attack classes – Evil Annotation and Sneaky Signature attacks which abuse ﬂaws in the current PDF speciﬁcation. Both attack classes allow an attacker to signiﬁcantly alter a certiﬁed document’s visible content without raising any warnings. Our practical evaluation shows that an attacker could change the visible content in 15 of 26 viewer applications by using Evil Annotation attacks and in 8 applications using Sneaky Signature by using PDF speciﬁcation compliant exploits. We improved both attacks’ stealthiness with applications’ implementation issues and found only two applications secure to all attacks. On top, we show how to gain high privileged JavaScript execution in Adobe.},
	language = {English},
	number = {2021},
	journal = {IEEE Security \& Privacy},
	author = {Rohlmann, Simon and Mladenov, Vladislav and Mainka, Christian and Schwenk, Jorg},
	month = may,
	year = {2021},
	pages = {17},
	file = {Rohlmann et al. - Breaking the Speciﬁcation PDF Certiﬁcation.pdf:/Users/tullsen/Zotero/storage/IJ6Q6MRG/Rohlmann et al. - Breaking the Speciﬁcation PDF Certiﬁcation.pdf:application/pdf},
}

@article{andrewmangleAnalysisMachineLearning2021,
	title = {Analysis of machine learning techniques for detecting malicious {PDF} files using {WEKA}},
	volume = {24},
	url = {http://nabet.us/j_archives/JBET_2021.pdf#page=64},
	abstract = {The  expansion  of  cloud  and  connected  software  and  hardware  has  increased  the  attack  surface  of  the  modern enterprise.  The  growth  in  quantity  and  quality  have  led  to  greaterpossibilities  of  system  vulnerabilities  leading  to exploits. One of the threat vectors attackers use is embedding malware to Portable Document Format (PDF) files. The popularity and  flexibility of these  file  formats have  made  PDFs an ideal target for unaware  users. Malicious PDFs contain executable code used by attackers to steal company information or disrupt normal business operations. Adobe Acrobat and Reader users can view, create, manipulate, print, and manage files in PDFs shared hence increased risk. The National Technology Security Coalition report (2020) shows that 68\% of data breaches occurred through email and 5\% successful attacks through PDF files. In 2019, CVE recorded 17,306 software vulnerabilities on the Adobe Acrobat PDF reader. These software vulnerabilities on Adobe Acrobat PDF may lead to unauthorized users controlling the  system,  resulting  in  malicious  programs,  unauthorized  access,  and  confidential  data  modification.  The  attacker may  also  delete  data  or  create  user  accounts  undetected.This  study  seeks  to;  identify  threats,  detect,  classify,  and create awareness of PDF malware on emails. This paper will present and compare different WEKA machine learning algorithms in malicious PDF detection and propose the best classifier from the analyzed algorithms.},
	language = {English},
	number = {1},
	journal = {Journal of Business, Economics and Technology - Spring 2021},
	author = {{Andrew Mangle} and {Farida Keter}},
	month = may,
	year = {2021},
	pages = {64--71},
	file = {Andrew Mangle and Farida Keter - 2021 - Analysis of machine learning techniques for detect.pdf:/Users/tullsen/Zotero/storage/NIMXK23E/Andrew Mangle and Farida Keter - 2021 - Analysis of machine learning techniques for detect.pdf:application/pdf},
}

@techreport{unifiedcrossdomaincapabilitiesofficeInspectionSanitizationGuidance2011,
	address = {USA},
	title = {Inspection and {Sanitization} {Guidance} for {Portable} {Document} {Format}, {Version} 1.0},
	url = {https://apps.nsa.gov/iaarchive/library/reports/pdf_inspection_and_sanitization_guidance_v1_0-20171206.cfm},
	abstract = {This Inspection and Sanitization Guidance for PDF document provides guidelines and specifications for developing file inspection and sanitization software for PDF files.  
It is well understood that the proprietary binary file formats used by Microsoft (MS) Office can introduce hidden, potentially sensitive data to a document without the author’s knowledge. PDF is often suggested as a “safer” alternative to MS Office formats to avoid such hidden data problems as metadata, macros, and editing comments. PDF is an open international standard from the ISO (32000-1:2008) [1], however, it is important to investigate the PDF file format and standard for hidden data vulnerabilities similar to those that has plagued MS Office.
There are two general hidden-data risks associated with the PDF file format. The first is improper use of the format by some malicious user that can lead to potentially sensitive data becoming obscured in a document. Improper redaction is one example of this, which has occurred numerous times ([2], and [3]). Secondly, the sheer quantities of complex features that have been added to the PDF format over its many revisions provide avenues for both malicious and accidental data hiding [4].
In this report, “hidden data” is defined as data in a document that is not readily visible when the document is printed or displayed with viewing applications. There are two general hidden data types: hidden content data, which is content data obscured through the use of formatting, (e.g., text overlaid by an image, overlapping images, text written in a very small font, etc.); and embedded hidden data, which is non-content and has to do with document structures. Metadata, revision history data, embedded objects or files, and active scripting or macros are typical forms of embedded hidden data.
This PDF evaluation shows that there are many opportunities for PDF files to contain both hidden content data and embedded hidden data. PDF’s flexible content formatting system allows almost any content object be resized, positioned, and colored at will. This flexibility leads to numerous ways in which document content can be obscured from view. The PDF format also contains many other complex, dynamic features that provide avenues for hidden data to enter a document.
To prepare a PDF document safe for distribution, all hidden content data and embedded hidden data risks must be eliminated.  
Content can be hidden in the following ways:
* Size – Text and graphical content can be sized so that it is too small to be visible when normally viewing document or when printed.  
* Cropping – Images can be cropped or clipped so that only a portion is visible in the document while the entirety of the image data resides within the PDF file.  
* Rendering – Reader software is responsible for displaying and representing the internals of the file to the user. Text and fonts, as well as images, go through a rendering process as they are painted on the document. Methods can be deployed to hide data from a human reader through font or rendering. 
* Transparency and Color – PDF’s coloring and transparency formatting features can be manipulated to make text and images transparent or impossible to differentiate from the background.  
* Positioning – Pieces of content in PDF files can be positioned anywhere on or off the visible or printed portion of a page, rendering it invisible. Additionally, any content could be positioned to overlap other content, obscuring it.
No set of formatting features can be singled out as the cause of hidden content data. Every formatting option has both legitimate and data hiding uses depending on the manner and situation in which it is used. The simplest and most reliable method for finding hidden content data is to identify and extract all textual and graphical content and evaluate it. This report has shown how to identify graphical content in addition to text and fonts. However, this approach is the most cumbersome and labor-intensive. Automated PDF analysis tools could implement rules to check for some types of hidden content data, for example, by searching for abnormally small font and image sizes, the presence of cropping, rendering allowing for hidden information, transparency, and image color settings that introduces obscurity, and abnormal positioning.},
	language = {English},
	number = {U/OO/234068-17},
	institution = {National Security Agency (NSA)},
	author = {{Unified Cross Domain Capabilities Office}},
	month = may,
	year = {2011},
	note = {Updated 13 Dec 2017: Updated Contact information, IAC Logo, Cited Trademarks and Copyrights, Expanded Acronyms, and added Legal Disclaime},
	pages = {241},
	annote = {Summary
Very detailed (241 pages)!
Breaks down into "hidden content data" and "embedded hidden data"
"No set of formatting features can be singled out as the cause of hidden content data. Every formatting option has both legitimate and data hiding uses depending on the manner and situation in which it is used"},
	file = {Unified Cross Domain Capabilities Office - 2011 - Inspection and Sanitization Guidance for Portable .pdf:/Users/tullsen/Zotero/storage/W2YSGD7I/Unified Cross Domain Capabilities Office - 2011 - Inspection and Sanitization Guidance for Portable .pdf:application/pdf},
}

@article{liBoostingTrainingPDF2021,
	title = {Boosting training for {PDF} malware classifier via active learning},
	volume = {n/a},
	copyright = {© 2021 Wiley Periodicals LLC},
	issn = {1098-111X},
	url = {https://pericles.pericles-prod.literatumonline.com/doi/abs/10.1002/int.22451},
	doi = {https://doi.org/10.1002/int.22451},
	abstract = {Machine learning algorithms are widely used for cybersecurity applications, include spam, malware detection. In these applications, the machine learning model has to face attack by adversarial samples. Therefore, how to train a robust machine learning model with small samples is a very hot research problem. portable document format (PDF) is a widely used file format, and often utilized as a vehicle for malicious behavior. There have been various PDF malware detectors based on machine learning. However, the labeling of large-scale data samples is time-consuming and laborious. This paper aims to reduce the size of training set while maintain the performance of detection. We propose a novel PDF malware detection method, using active learning to boost training. Particularly, we first make clear the meaning of uncertain samples in this paper, and theoretically explain the effectiveness of these uncertain samples for malware detection. Second, we present an active-learning based malware detection model, using mutual agreement analysis to choose the uncertain sample as the data augmentation. The detector is retrained according to the ground truth of the uncertain samples rather than the whole test samples in the previous epoch, which can not only improve the detection performance, but also reduce the training time consumption of the detector. We conduct 10 epochs of retraining experiments for comparison, using the uncertain samples and the whole test samples from the previous epoch respectively as training set augmentation. The experimental results show that our active-learning based model can achieve the same performance as the traditional model in the tenth epoch of retraining, while the former only needs to use one thirtieth of the latter's training samples.},
	language = {English},
	number = {n/a},
	urldate = {2021-05-19},
	journal = {International Journal of Intelligent Systems},
	author = {Li, Yuanzhang and Wang, Xinxin and Shi, Zhiwei and Zhang, Ruyun and Xue, Jingfeng and Wang, Zhi},
	month = may,
	year = {2021},
	note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/int.22451},
	keywords = {PDF, malware detection, machine learning, active learning},
}

@misc{didierstevensPDFTools2008,
	title = {{PDF} {Tools}},
	url = {https://blog.didierstevens.com/programs/pdf-tools/},
	abstract = {I produced screencasts for my pdfid and pdf-parser tools, you can find them on Didier Stevens Labs products page. There are translations of this page, see bottom. pdf-parser.py This tool will parse…},
	language = {English},
	journal = {Didier Stevens},
	author = {{Didier Stevens}},
	month = oct,
	year = {2008},
}

@misc{fortunaandreaMaliciousDocumentsAnalysis2017,
	title = {Malicious documents analysis: my own list of tools},
	shorttitle = {Malicious documents analysis},
	url = {https://www.andreafortuna.org/cybersecurity/malicious-documents-analysis-my-own-list-of-tools/},
	abstract = {It's important to have the right tools to analyze suspect documents! Currently, the main malware infection vehicle remains the classic malicious document attached to an email. So it is very important to have the right tools to analyze suspect documents. Let's see a list of my favorite tools for analyzing Microsoft Office and PDF files. Microsoft Office OfficeMalScanner Locates shellcode and VBA macros into MS Office Files, and alsoextracts shellcode and embeds it an EXE file for further analysis. Microsoft Offvis Shows raw contents and structure of an MS Office file, and identifies some common exploits. Hachoir-urwid Allow navigation through the structure of binary Office files and viewing stream contents. Office Binary Translator Converts DOC, PPT, and XLS files into Open XML files. pyOLEScanner Can examine and decode some aspects of malicious binary Office files. PDF PDFiD Identifies PDFs that contain strings associated with scripts and actions. PDF-parser Examines the},
	language = {English},
	journal = {So Long, and Thanks for All the Fish},
	author = {{Fortuna, Andrea}},
	month = jun,
	year = {2017},
}

@misc{kopfPDFRenderingEngine2018,
	type = {Blog},
	title = {{PDF} rendering engine performance and fidelity comparison},
	url = {https://hub.alfresco.com/t5/alfresco-content-services-blog/pdf-rendering-engine-performance-and-fidelity-comparison/ba-p/287618},
	abstract = {A while ago, Alfresco decided to replace the Ghostscript engine in our products. It has been used as a rasteriser to transform PDF files to PNG images within Alfresco Content Services (ACS). The main cause was due to Ghostscript’s change to an AGPL license, which caused some concerns among our custo...},
	language = {English},
	urldate = {2021-05-18},
	journal = {Alfresco Hub},
	author = {Kopf, Stephan},
	month = may,
	year = {2018},
	note = {Section: Alfresco Content Services - Blog},
}

@article{azkueEmbeddingInteractiveThreedimensional2021a,
	series = {Clinical {Anatomy}},
	title = {Embedding interactive, three-dimensional content in portable document format ({PDF}) to deliver gross anatomy information and knowledge},
	volume = {n/a},
	copyright = {This article is protected by copyright. All rights reserved.},
	issn = {1098-2353},
	url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/ca.23755},
	doi = {https://doi.org/10.1002/ca.23755},
	abstract = {Introduction The Portable Document Format (PDF) is likely the most widely used digital file format for scholarly and scientific electronic publishing. Since format specification version 1.6, three-dimensional (3D) models in Universal 3D (U3D) format can be embedded into PDF files. The present work demonstrates a repertoire of graphic strategies and modes of presentation that exploit the potentials of 3D models embedded in PDF to deliver anatomical information and knowledge. Materials and Methods Three-dimensional models and scenes representing anatomical structures generated by 3D surface scanning or by segmentation from either clinical imaging data or cadaver sectional images were converted into U3D format and then embedded into PDF files using both freely and commercially available software. The relevant steps and required software tools are described. Built-in tools in Adobe Acrobat and JavaScript scripting both were used to pre-configure user interaction with 3D contents. Results Eight successive proof-of-concept examples of increasing complexity are presented and provided as supplementary material, including both unannotated and annotated 3D specimens, use of bitmap-textures, guided navigation though predetermined 3D scenes, 3D animation, and interactive navigation though tri-planar sectional human cadaver images. Conclusions Three-dimensional contents embedded in PDF files are generally comparable to multimedia and dedicated 3D software in terms of quality, flexibility, and convenience, and offer new unprecedented opportunities to deliver anatomical information and knowledge.},
	language = {English},
	number = {n/a},
	urldate = {2021-05-18},
	journal = {Clinical Anatomy},
	author = {Azkue, Jon Jatsu},
	month = may,
	year = {2021},
	note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/ca.23755},
	keywords = {3D visualization, Medical education, Multimedia, Surface rendering},
	file = {Azkue - Embedding interactive, three-dimensional content i.pdf:/Users/tullsen/Zotero/storage/PHXDH7I3/Azkue - Embedding interactive, three-dimensional content i.pdf:application/pdf},
}

@misc{pietersIdiosyncrasiesHTMLParser,
	title = {Idiosyncrasies of the {HTML} parser - {The} {HTML} {Parser} {Book}},
	url = {https://htmlparser.info/},
	abstract = {The HTML parser is a piece of software that processes HTML markup and produces an in-memory tree representation (known as the DOM).

The HTML parser has many strange behaviors. This book will highlight the ins and outs of the HTML parser, and contains almost-impossible quizzes.

HTML is not only used by basically all of the web, but it is also part of many modern applications. The HTML parser is part of the foundation of the web platform},
	urldate = {2021-05-18},
	author = {Pieters, Simon},
}

@article{knaslaEnhancedMalJPEGNovel2020,
	title = {Enhanced {MalJPEG}: {A} {Novel} {Approach} for the {Detection} of {Malicious} {JPEG} {Images}},
	volume = {6},
	issn = {2395-566X},
	abstract = {In  recent  year  cyber-attacks  are  increased.The attackers  targeting  individuals,  businesses and  organizations. Such attacks usually result in critical harmto the organization, such as the loss and or leakage of sensitive and confidential information. Some non-executable files allow an attacker to run arbitrary malicious code on the targeted victim machine when  the  file  is  opened.  Millions  of  people  are  used  images  for  daily  purpose. In  some  cases,  some  types  of  images  can contain  a malware  codesand  perform  harmful  actions.  JPEG  images  are  used  by  almost  everyone,  from  individuals  to large  enterprises,  and  on  various  platforms;Because  cyber  criminals  misuse  JPEG  image  for  malicious  purpose.In  this paper, we design a new method is named as Enhanced MalJPEG. Our system can to detect malicious JPEG images using CNN  and  machine  learning  techniques.This  method  extracts  different  features  from  the  JPEG  file  structure  and  CNN based  features  from  JPEG  file  and  leverages  them  with  a  machine  learning  classifier,  in  order  to  discriminate  between benign and malicious JPEG images.},
	language = {English},
	number = {6},
	journal = {International Journal of Scientific Research \& Engineering Trends},
	author = {{K Nasla} and {M Shabna}},
	month = nov,
	year = {2020},
	pages = {3285--3287},
	file = {IJSRET_V6_issue6_730.pdf:/Users/tullsen/Zotero/storage/3IDQPJ78/IJSRET_V6_issue6_730.pdf:application/pdf},
}

@article{jordanSAFEPDFRobustDetection2018,
	title = {{SAFE}-{PDF}: {Robust} {Detection} of {JavaScript} {PDF} {Malware} {Using} {Abstract} {Interpretation}},
	shorttitle = {{SAFE}-{PDF}},
	url = {http://arxiv.org/abs/1810.12490},
	abstract = {The popularity of the PDF format and the rich JavaScript environment that PDF viewers oﬀer make PDF documents an attractive attack vector for malware developers. PDF documents present a serious threat to the security of organizations because most users are unsuspecting of them and thus likely to open documents from untrusted sources.},
	language = {en},
	urldate = {2021-01-15},
	journal = {arXiv:1810.12490 [cs]},
	author = {Jordan, Alexander and Gauthier, François and Hassanshahi, Behnaz and Zhao, David},
	month = oct,
	year = {2018},
	note = {arXiv: 1810.12490},
	keywords = {Computer Science - Cryptography and Security},
	pages = {31},
	file = {Jordan et al. - 2018 - SAFE-PDF Robust Detection of JavaScript PDF Malwa.pdf:/Users/tullsen/Zotero/storage/A2J8IG4C/Jordan et al. - 2018 - SAFE-PDF Robust Detection of JavaScript PDF Malwa.pdf:application/pdf},
}

@article{koptyraDistributedSteganographyPDF2020,
	title = {Distributed {Steganography} in {PDF} {Files} - {Secrets} {Hidden} in {Modified} {Pages}},
	volume = {22},
	issn = {1099-4300},
	url = {https://www.mdpi.com/1099-4300/22/6/600},
	doi = {10.3390/e22060600},
	abstract = {This paper shows how to diffuse a message and hide it in multiple PDF ﬁles. Presented method uses dereferenced objects and secret splitting or sharing algorithms. It is applicable to various types of PDF ﬁles, including text documents, presentations, scanned images etc. Because hiding process is based on structure manipulation, the solution may be easily combined with content-dependent steganographic techniques. The hidden pages are not visible in typical application usage, which was tested with seven different programs.},
	language = {en},
	number = {6},
	urldate = {2020-06-02},
	journal = {Entropy},
	author = {Koptyra, Katarzyna and Ogiela, Marek R.},
	month = may,
	year = {2020},
	pages = {600},
	file = {Koptyra and Ogiela - 2020 - Distributed Steganography in PDF Files—Secrets Hid.pdf:/Users/tullsen/Zotero/storage/FNDLVEBI/Koptyra and Ogiela - 2020 - Distributed Steganography in PDF Files—Secrets Hid.pdf:application/pdf},
}

@misc{ndsssymposiumNDSS2021Shadow2021,
	title = {{NDSS} 2021 {Shadow} {Attacks}: {Hiding} and {Replacing} {Content} in {Signed} {PDFs}},
	shorttitle = {{NDSS} 2021 {Shadow} {Attacks}},
	url = {https://www.youtube.com/watch?v=YNipmiHq7xU&list=PLfUWWM-POgQvcgc0s4vDrtvgW1RoKk699&index=6},
	abstract = {SESSION 1B-4 Shadow Attacks: Hiding and Replacing Content in Signed PDFs

Digitally signed PDFs are used in contracts and invoices to guarantee the authenticity and integrity of their content. A user opening a signed PDF expects to see a warning in case of *any* modification. In 2019, Mladenov et al. revealed various parsing vulnerabilities in PDF viewer implementations. They showed attacks that could modify PDF documents without invalidating the signature. As a consequence, affected vendors of PDF viewers implemented countermeasures preventing *all* attacks.

This paper introduces a novel class of attacks, which we call *shadow* attacks. The *shadow* attacks circumvent all existing countermeasures and break the integrity protection of digitally signed PDFs. Compared to previous attacks, the *shadow* attacks do not abuse implementation issues in a PDF viewer. In contrast, *shadow* attacks use the enormous flexibility provided by the PDF specification so that *shadow* documents remain standard-compliant. Since *shadow* attacks abuse only legitimate features, they are hard to mitigate.

Our results reveal that 16 (including Adobe Acrobat and Foxit Reader) of the 29 PDF viewers tested were vulnerable to *shadow* attacks. We introduce our tool *PDF-Attacker* which can automatically generate *shadow* attacks. In addition, we implemented *PDF-Detector* to prevent *shadow* documents from being signed or forensically detect exploits after being applied to signed PDFs.
Network and Distributed System Security (NDSS) Symposium 2021, 21 – 25 February 2021 
https://www.ndss-symposium.org/ndss-p...​


ABOUT NDSS
The Network and Distributed System Security Symposium (NDSS) fosters information exchange among researchers and practitioners of network and distributed system security. The target audience includes those interested in practical aspects of network and distributed system security, with a focus on actual system design and implementation. A major goal is to encourage and enable the Internet community to apply, deploy, and advance the state of available security technologies.
https://www.ndss-symposium.org/​

\#NDSS​  \#NDSS21​ \#NDSS2021​ \#InternetSecurity​},
	language = {English},
	urldate = {2021-03-10},
	author = {{NDSS Symposium}},
	month = feb,
	year = {2021},
}

@article{shaikhMachineLearningBased2021,
	title = {Machine {Learning} {Based} {Solution} for the {Detection} of {Malicious} {JPEG} {Image} and {QR} {Code}},
	issn = {2581-9429},
	url = {http://ijarsct.co.in/april2.html},
	doi = {10.48175/IJARSCT-1071},
	abstract = {The biggest invention of 21st century is the social media. It is biggest platform which is using to share data, files and documents. Even it is using to share thoughts, ideas and feelings using different tools and techniques. People are hyper connected with each other and they are continuously sharing the information. For criminals, deploying malware in such scenario is very easy and propagating malware through JPEG images and QR Code is one of the best and most advanced method. Using steganography techniques, criminals embedded the malicious codes with legitimate or innocent looking images. This malicious content is just few line of codes which exploit the vulnerability of application. It give remote access of this system to the attacker which can do criminal act. In this framework, our primary purpose is to find the presence of any code or data in image. After it, the major section of this framework based upon the finding of code and its adverse effects. This framework shows the corresponding solution to the malicious code presence in JPEG images and QR code which are spreading through online social networking sites.},
	language = {en},
	urldate = {2021-05-11},
	journal = {International Journal of Advanced Research in Science, Communication and Technology},
	author = {Shaikh, Prof. Asma and Kotavadekar, Ms. Rasika and Sawant, Ms. Sanjita and Landge, Ms. Sayali},
	month = apr,
	year = {2021},
	pages = {580--583},
	file = {Shaikh et al. - 2021 - Machine Learning Based Solution for the Detection .pdf:/Users/tullsen/Zotero/storage/MCCXRFCD/Shaikh et al. - 2021 - Machine Learning Based Solution for the Detection .pdf:application/pdf},
}

@misc{MalwareTrackerPDF2020,
	title = {malware tracker: {PDF} {Current} {Threats}},
	url = {http://www.malwaretracker.com/pdfthreat.php},
	language = {English},
	year = {2020},
	note = {https://web.archive.org/web/20200202084912/https://malwaretracker.com/},
}

@article{raoDetectionMalwaresPDF,
	title = {Detection of malwares in {PDF} file using deep learning},
	volume = {32},
	issn = {2651  -4451},
	url = {https://turkjphysiotherrehabil.org/pub/pdf/322/32-2-80.pdf},
	abstract = {In recent times, malware has been growing continuously as in our digital world. The detection of malware is important for recognizing cyber security problems in society. In recent years AI techniques are exploited to sense the malware. Based on the approach of signature for detecting the products like malware and antivirus with the help of assumption rules in order to identify and categorize the various groups in malware detection types. Due to the particular specified rules, it seems very difficult to acknowledge the newly occurred malware. In recent times, the widely used technique with improved performance even in larger datasets is Deep learning algorithm. Deep learning techniques are capable o to discriminate benevolent and malevolent files without cost and unreliable feature engineering.},
	language = {English},
	number = {2},
	journal = {Turkish Journal of Physiotherapy and Rehabilitation},
	author = {Rao, Ganga Rama Koteswara and Sagar, P Vidya},
	pages = {8},
	file = {Rao and Sagar - Detection of malwares in PDF file using deep learn.pdf:/Users/tullsen/Zotero/storage/EKL239SS/Rao and Sagar - Detection of malwares in PDF file using deep learn.pdf:application/pdf},
}

@misc{EverParseHardeningCritical2021,
	title = {{EverParse}: {Hardening} critical attack surfaces with formally proven message parsers},
	shorttitle = {{EverParse}},
	url = {https://www.microsoft.com/en-us/research/blog/everparse-hardening-critical-attack-surfaces-with-formally-proven-message-parsers/},
	abstract = {EverParse is a framework for generating provably secure parsers and formatters used to improve the security of critical code bases at Microsoft. EverParse is developed as part of Project Everest, a collaboration between Microsoft Research labs in Redmond, Washington; India; and Cambridge, United Kingdom; the Microsoft Research-Inria Joint Centre; Inria; Carnegie Mellon University; and several […]},
	language = {en-US},
	urldate = {2021-05-04},
	journal = {Microsoft Research},
	month = may,
	year = {2021},
}

@inproceedings{greenwaldEPrimeSecurityNew2006,
	address = {New York, NY, USA},
	series = {{NSPW} '06},
	title = {E-{Prime} for security: a new security paradigm},
	isbn = {978-1-59593-923-4},
	shorttitle = {E-{Prime} for security},
	url = {https://doi.org/10.1145/1278940.1278954},
	doi = {10.1145/1278940.1278954},
	abstract = {This paper details a true and striking paradigm shift: the use of E-Prime for (at least) user-centered security, organizational/enterprise security policies and informal security policy modeling. In 1965, D. David Bourland, Jr. proposed E-Prime as an addition to Korzybski's General Semantics. Bourland defined E-Prime as that proper subset of the English language that omits any forms of the verb "to be." E-Prime seems desirable because two forms of the verb "to be" have structural problems with security implications that the use of E-Prime would eliminate. I first examine the rationale for E-Prime (reviewing the Sapir-Whorf hypothesis and the relevant parts of General Semantics), and then cover the basics of E-Prime. Next I examine the use of E-Prime with several "before and after" examples in the areas of user-centered security (Microsoft and ZoneAlarm software messages), organizational/enterprise security policy, and informal security policy modeling (including some examples from the U.S. Computer Security Act and the Clark-Wilson model); these examples show how EPrime can make great improvements in eliminating bad structure and how its use can lead to an overall improvement in security. I then present some of the discussion that occurred at the New Security Paradigms Workshop. I conclude with some thoughts for other areas of promising future research, including roles and responsibilities, program management, risk management, planning and the security life cycle, assurance, disaster planning, incident handling, user awareness and training, support and operations, spam detection, security engineering, and automated E-Prime tools.},
	urldate = {2021-04-13},
	booktitle = {Proceedings of the 2006 workshop on {New} security paradigms},
	publisher = {Association for Computing Machinery},
	author = {Greenwald, Steven J.},
	month = sep,
	year = {2006},
	keywords = {Clark-Wilson, disaster planning, E-Prime, enterprise security policy, formal methods, general semantics, incident handling, informal security policy modeling, information assurance, life cycle, modeling, organizational security policy, planning, principle of linguistic relativity, program management, RBAC, risk management, role-based access control, Sapir-Whorf hypothesis, security engineering, security life cycle, security policy modeling, software life cycle SP800-12, spam detection, user awareness, user centered security, user support and operations, user training},
	pages = {87--95},
	file = {Greenwald - 2006 - E-Prime for security a new security paradigm.pdf:/Users/tullsen/Zotero/storage/VBQPZFGB/Greenwald - 2006 - E-Prime for security a new security paradigm.pdf:application/pdf},
}

@techreport{ExaminationRedactionFunctionality2019,
	address = {Australia},
	title = {An {Examination} of the {Redaction} {Functionality} of {Adobe} {Acrobat} {Pro} {DC} 2017 {\textbar} {Cyber}.gov.au},
	url = {https://www.cyber.gov.au/acsc/view-all-content/publications/examination-redaction-functionality-adobe-acrobat-pro-dc-2017},
	abstract = {There have been numerous cases of security breaches resulting from a failure to effectively redact sensitive or private information from documents prior to release into the public domain. To assist in mitigating this security risk, Adobe Acrobat Pro DC 2017 provides redaction and sanitisation functionality that aims to completely remove undesirable information and other hidden information (e.g. metadata) from PDF documents.

This document provides guidance on the efficacy of redaction facilities within Adobe Acrobat Pro DC 2017 and is intended for information technology and information security professionals within organisations looking to redact sensitive or personal information from PDF documents before releasing them into the public domain or to other third parties.},
	language = {English},
	institution = {Australian Cyber Security Centre},
	month = jan,
	year = {2019},
	note = {https://www.cyber.gov.au/acsc/view-all-content/publications/examination-redaction-functionality-adobe-acrobat-pro-dc-2017},
	pages = {25},
	file = {An Examination of the Redaction Functionality of A.pdf:/Users/tullsen/Zotero/storage/5BXGXZQI/An Examination of the Redaction Functionality of A.pdf:application/pdf;PROTECT - An Examination of the Redaction Functionality of Adobe Acrobat Pro DC 2017 (June 2020).pdf:/Users/tullsen/Zotero/storage/BDCAA96G/PROTECT - An Examination of the Redaction Functionality of Adobe Acrobat Pro DC 2017 (June 2020).pdf:application/pdf},
}

@article{phungDetectionMaliciousJavaScript2021,
	title = {Detection of malicious {JavaScript} on an imbalanced dataset},
	issn = {2542-6605},
	url = {http://www.sciencedirect.com/science/article/pii/S2542660521000019},
	doi = {10.1016/j.iot.2021.100357},
	abstract = {In order to be able to detect new malicious JavaScript with low cost, methods with machine learning techniques have been proposed and gave positive results. These methods focus on achieving a light-weight filtering model that can quickly and precisely filter out malicious data for dynamic analysis. A method constructs a language model using Natural Language Processing techniques to represent the data in vector form from the source code for machine learning. This method has high score with the balanced dataset, however the experiment with an imbalanced dataset has not been done. Previous studies mainly focus on a balanced dataset, however the dataset is not representative of real-world data, and it rises questions in practical uses of the model. A good model that can have a high recall score with imbalanced dataset is needed for a good filter. To construct an efficient language model, and to deal with the data imbalance problem, we focus on oversampling techniques. In our research, our method is the first to use oversampling and machine learning to detect malicious JavaScript. The experimental result shows that our method can detect new malicious JavaScript more accurately and efficiently. Our model can quickly filter out malicious data for dynamic analysis. The best recall score achieves 0.72 with the Doc2Vec model.},
	language = {en},
	urldate = {2021-01-09},
	journal = {Internet of Things},
	author = {Phung, Ngoc Minh and Mimura, Mamoru},
	month = jan,
	year = {2021},
	keywords = {Machine learning, Malicious JavaScript, Attention Mechanism, Natural Language Processing, Oversampling},
	pages = {100357},
	file = {Phung and Mimura - 2021 - Detection of malicious JavaScript on an imbalanced.pdf:/Users/tullsen/Zotero/storage/FHUS995X/Phung and Mimura - 2021 - Detection of malicious JavaScript on an imbalanced.pdf:application/pdf},
}

@misc{garethheyesPortableDataExFiltration2020,
	title = {Portable {Data} {exFiltration}: {XSS} for {PDFs}},
	shorttitle = {Portable {Data} {exFiltration}},
	url = {https://portswigger.net/research/portable-data-exfiltration},
	abstract = {Abstract PDF documents and PDF generators are ubiquitous on the web, and so are injection vulnerabilities. Did you know that controlling a measly HTTP hyperlink can provide a foothold into the inner w},
	language = {English},
	urldate = {2020-12-16},
	journal = {PortSwigger Research},
	author = {{Gareth Heyes}},
	month = dec,
	year = {2020},
	note = {https://github.com/PortSwigger/portable-data-exfiltration/tree/main/PDF-research-samples},
	file = {Gareth Heyes - 2020 - Portable Data exFiltration XSS for PDFs.pdf:/Users/tullsen/Zotero/storage/VJA3NKLL/Gareth Heyes - 2020 - Portable Data exFiltration XSS for PDFs.pdf:application/pdf},
}

@misc{kewandecamposExtractingYourAWS2020,
	title = {Extracting your {AWS} {Access} {Keys} through a {PDF} file},
	url = {https://triskelelabs.com/extracting-your-aws-access-keys-through-a-pdf-file/},
	abstract = {MicroStrategy SSRF through PDF Generator (CVE-2020-24815)  The Portable Document Format (PDF) was first developed in 1993 and since then, PDF files have gained widespread adoption, becoming the de facto standard for document publishing. It was only natural that developers followed the trending popularity and nowadays it is common for applications to allow you to export documents to a PDF file. At first glance, this functionality may seem harmless; something which you might not spare too much thought. […]},
	language = {English},
	urldate = {2021-03-25},
	journal = {Triskele Labs},
	author = {{Ke Wan de Campos}},
	month = nov,
	year = {2020},
}

@book{owensTheoryCraftDigital2019,
	address = {USA},
	edition = {1st Edition, 11 December 2018},
	title = {The {Theory} and {Craft} of {Digital} {Preservation}.},
	volume = {1},
	isbn = {978-1-4214-2697-6},
	url = {http://www.trevorowens.org/2017/06/full-draft-of-theory-craft-of-digital-preservation/},
	abstract = {A summit on digital preservation at the Library of Congress in the early 2000s brought together leaders from industry and the cultural heritage sector to work through the mounting challenges in ensuring longterm access to digital information. One participant from a technology company proposed something like “Why don’t we just hoover it all up and shoot it into space.” The “it” in this case being any and all historically significant digital content. Many participants laughed, but it wasn’t intended as a joke. Many individuals have, and continue to seek similar (although generally not quite as literal) “moon-shots,” singular technical solutions to the problem of enduring access to digital information.
More than a decade later, we find ourselves amid the same stories and imagined solutions we have heard for the last (at least) 20 years. For the public, there is a belief (and worry) that if something is on the Internet, it will be around forever. At the same time, warnings of an impending “digital dark age,” where records of the recent past become completely lost or inaccessible, frequently appear in the popular press. It is as if digital information will last forever but also, somehow, disappear dramatically all at once. The hype cycles of digital technology, combined with a basic lack of understanding about digital media leave people ill-equipped to sort through the hype and anxiety. Yet, I’ve found that when I tell people that
I work on digital preservation and explain what I mean by that term and the work, most people respond something along the lines of “Gosh! I never even thought about that!” For many executives, policy makers and administrators new to digital preservation it seems like the world needs someone to design some super system that can “solve” the problem of digital preservation. The wisdom of the cohort of digital preservation practitioners in libraries, archives, museums, and other cultural memory institutions who have been doing this work for half a century suggests this is an illusory dream. To them, the recurring idea of the singular technological super system that “solves the problem” is distraction not worth chasing. It’s also something that diverts resources from those doing the work. Working to ensure long-term access to digital information is not a problem for a singular tool to solve. It is a complex field with a significant set of ethical dimensions. It is a vocation. It is only possible through the application of resources from our cultural institutions. This book is intended as a point of entry into the theory and craft of digital preservation as it has emerged in practice.
The purpose of this book is to offer a path for getting beyond the hyperbole and the anxiety of “the digital” and establish a baseline of practice. To do this, one needs to first unpack what we mean by preservation. It is then critical to establish a basic knowledge of the nature of digital media and digital information. With these in hand, anyone can make significant and practical advances toward mitigating the most pressing risks of digital loss. For more than half a century, librarians, archivists, and curators have been establishing practices and approaches to ensure long-term access to digital information. Building from this work, this book provides both a theoretical basis for digital preservation and a wellgrounded approach to its practices and craft.
As a guidebook and an introduction, this text is a synthesis of extensive reading, research, writing, and speaking on the subject of digital preservation. It is grounded in my work on digital preservation at the Library of Congress and before that, working on digital humanities projects at the Roy Rosenzweig Center for History and New Media at George Mason University. The first section of the book synthesizes work on the history of preservation in a range of areas (archives, manuscripts, recorded sound, etc.) and sets that history in dialog with work in new media studies, platform studies, and media archeology. The later chapters build from this theoretical framework as a basis for an iterative process for the practice of doing digital preservation.
This book serves as both a basic introduction to the issues and practices of digital preservation and a theoretical framework for deliberately and intentionally approaching digital preservation as a field with multiple lineages. The intended audience is current and emerging library, archive, and museum professionals as well as the scholars and researchers who interface with these fields. I hope that it can have use more broadly as well to anyone interested in beginning to practice the craft of digital preservation in any other field as well. While the issues are complex, I have done my best to make the book something that could be useful to someone working for a massive institution or as a lone arranger in a tiny historical society or community archive.
While the book has a practical bent, it is not a how-to book, such a thing would quickly become outdated. The book isn’t a set of step-by-step instructions. It is more intended as a point of reference for developing and honing one’s own craft as a digital preservation practitioner. It’s good to remember that the opposite of practical isn’t theoretical, it’s impractical.1 I delve into a range of scholarship in media theory as well as archival and library science theory. To this end, I’m working to anchor digital preservation craft in an understanding of the traditions of preservation and the nature of digital objects and media.},
	language = {English},
	publisher = {John’s Hopkins University Press},
	author = {Owens, Trevor},
	month = nov,
	year = {2019},
	note = {http://www.trevorowens.org/theory-and-craft-of-digital-preservation/},
	file = {Owens - 2019 - The Theory and Craft of Digital Preservation..pdf:/Users/tullsen/Zotero/storage/RNGWYN3S/Owens - 2019 - The Theory and Craft of Digital Preservation..pdf:application/pdf},
}

@patent{lancioniDetectionMaliciousPolyglot2020,
	title = {Detection of {Malicious} {Polyglot} {Files}},
	url = {http://www.freepatentsonline.com/y2020/0226253.html},
	abstract = {Particular embodiments described herein provide for an electronic device that can be configured to identify a file, determine a polyglotness score for the file, where the polyglotness score is an indicator of whether or not the file is a polyglot file, and analyze the file for the presence of malware if the polyglotness score satisfies threshold.},
	nationality = {United States},
	language = {English},
	assignee = {McAfee, LLC (Santa Clara, CA, US)},
	number = {20200226253},
	urldate = {2020-08-15},
	author = {Lancioni, German and Woodward, Carl D.},
	collaborator = {Patel, Ashokkumar B.},
	month = jul,
	year = {2020},
	file = {Lancioni and Woodward - 2020 - Detection of Malicious Polyglot Files.pdf:/Users/tullsen/Zotero/storage/4F4P6FMI/Lancioni and Woodward - 2020 - Detection of Malicious Polyglot Files.pdf:application/pdf},
}

@article{reidMakingFormalMethods2020,
	title = {Towards making formal methods normal: meeting developers where they are},
	shorttitle = {Towards making formal methods normal},
	url = {http://arxiv.org/abs/2010.16345},
	abstract = {Formal verification of software is a bit of a niche activity: it is only applied to the most safety-critical or security-critical software and it is typically only performed by specialized verification engineers. This paper considers whether it would be possible to increase adoption of formal methods by integrating formal methods with developers' existing practices and workflows. We do not believe that widespread adoption will follow from making the prevailing formal methods argument that correctness is more important than engineering teams realize. Instead, our focus is on what we would need to do to enable programmers to make effective use of formal verification tools and techniques. We do this by considering how we might make verification tooling that both serves developers' needs and fits into their existing development lifecycle. We propose a target of two orders of magnitude increase in adoption within a decade driven by ensuring a positive `weekly cost-benefit' ratio for developer time invested.},
	language = {en},
	urldate = {2020-11-03},
	journal = {arXiv:2010.16345 [cs]},
	author = {Reid, Alastair and Church, Luke and Flur, Shaked and de Haas, Sarah and Johnson, Maritza and Laurie, Ben},
	month = oct,
	year = {2020},
	note = {arXiv: 2010.16345},
	keywords = {Computer Science - Software Engineering, Computer Science - Logic in Computer Science, D.2.4, D.2.5, F.3.1, H.1.2},
	annote = {Comment: To be presented at HATRA 2020: Human Aspects of Types and Reasoning Assistants, 15-20 November, 2020, Chicago, IL. 10 pages},
	file = {Reid et al. - 2020 - Towards making formal methods normal meeting deve.pdf:/Users/tullsen/Zotero/storage/ZZFRFWQ6/Reid et al. - 2020 - Towards making formal methods normal meeting deve.pdf:application/pdf},
}

@article{mullerOfficeDocumentSecurity2020,
	title = {Ofﬁce {Document} {Security} and {Privacy}},
	abstract = {OOXML and ODF are the de facto standard data formats for word processing, spreadsheets, and presentations. Both are XML-based, feature-rich container formats dating back to the early 2000s. In this work, we present a systematic analysis of the capabilities of malicious ofﬁce documents. Instead of focusing on implementation bugs, we abuse legitimate features of the OOXML and ODF speciﬁcations. We categorize our attacks into ﬁve classes: (1) Denial-of-Service attacks affecting the host on which the document is processed. (2) Invasion of privacy attacks that track the usage of the document. (3) Information disclosure attacks exﬁltrating personal data out of the victim’s computer. (4) Data manipulation on the victim’s system. (5) Code execution on the victim’s machine. We evaluated the reference implementations – Microsoft Ofﬁce and LibreOfﬁce – and found both of them to be vulnerable to each tested class of attacks. Finally, we propose mitigation strategies to counter these attacks.},
	language = {English},
	journal = {14th USE­NIX Work­shop on Of­fen­si­ve Tech­no­lo­gies (WOOT 2020)},
	author = {Müller, Jens and Ising, Fabian and Mainka, Christian and Mladenov, Vladislav and Schinzel, Sebastian},
	year = {2020},
	pages = {13},
	file = {Müller et al. - Ofﬁce Document Security and Privacy.pdf:/Users/tullsen/Zotero/storage/XFWHLAR4/Müller et al. - Ofﬁce Document Security and Privacy.pdf:application/pdf},
}

@article{mainkaShadowAttacksHiding2021,
	title = {Shadow {Attacks}: {Hiding} and {Replacing} {Content} in {Signed} {PDFs}},
	volume = {2021},
	issn = {1-891562-66-},
	url = {https://www.ndss-symposium.org/wp-content/uploads/ndss2021_1B-4_24117_paper.pdf},
	doi = {https://dx.doi.org/10.14722/ndss.2021.2411},
	abstract = {Digitally signed PDFs are used in contracts and invoices to guarantee the authenticity and integrity of their content. A user opening a signed PDF expects to see a warning in case of any modiﬁcation. In 2019, Mladenov et al. revealed various parsing vulnerabilities in PDF viewer implementations. They showed attacks that could modify PDF documents without invalidating the signature. As a consequence, affected vendors of PDF viewers implemented countermeasures preventing all attacks.},
	language = {English},
	journal = {Network and Distributed Systems Security (NDSS) Symposium 2021},
	author = {Mainka, Christian and Mladenov, Vladislav and Rohlmann, Simon},
	month = feb,
	year = {2021},
	pages = {17},
	file = {report-pdf-signatures-2020-03-02.pdf:/Users/tullsen/Zotero/storage/KGXMRMMI/report-pdf-signatures-2020-03-02.pdf:application/pdf;Mainka et al. - Shadow Attacks Hiding and Replacing Content in Si.pdf:/Users/tullsen/Zotero/storage/T4NK879G/Mainka et al. - Shadow Attacks Hiding and Replacing Content in Si.pdf:application/pdf},
}

@inproceedings{torresMaliciousPDFDocuments2018,
	address = {Funchal, Madeira, Portugal},
	title = {Malicious {PDF} {Documents} {Detection} using {Machine} {Learning} {Techniques} - {A} {Practical} {Approach} with {Cloud} {Computing} {Applications}:},
	isbn = {978-989-758-282-0},
	shorttitle = {Malicious {PDF} {Documents} {Detection} using {Machine} {Learning} {Techniques} - {A} {Practical} {Approach} with {Cloud} {Computing} {Applications}},
	url = {http://www.scitepress.org/DigitalLibrary/Link.aspx?doi=10.5220/0006609503370344},
	doi = {10.5220/0006609503370344},
	abstract = {PDF, Malware, JavaScript, Machine Learning, Malware Detection.},
	language = {English},
	urldate = {2021-03-09},
	booktitle = {Proceedings of the 4th {International} {Conference} on {Information} {Systems} {Security} and {Privacy}},
	publisher = {SCITEPRESS - Science and Technology Publications},
	author = {Torres, Jose and De los Santos, Sergio},
	year = {2018},
	pages = {337--344},
	file = {Torres and De los Santos - 2018 - Malicious PDF Documents Detection using Machine Le.pdf:/Users/tullsen/Zotero/storage/FIBTH9WP/Torres and De los Santos - 2018 - Malicious PDF Documents Detection using Machine Le.pdf:application/pdf},
}

@article{adhataraoRobustPDFFiles2021,
	title = {Robust {PDF} {Files} {Forensics} {Using} {Coding} {Style}},
	url = {https://arxiv.org/pdf/2103.02702.pdf},
	abstract = {Identifying how a ﬁle has been created is often interesting in security. It can be used by both attackers and defenders. Attackers can exploit this information to tune their attacks and defenders can understand how a malicious ﬁle has been created after an incident. In this work, we want to identify how a PDF ﬁle has been created. This problem is important because PDF ﬁles are extremely popular: many organizations publish PDF ﬁles online and malicious PDF ﬁles are commonly used by attackers. Our approach to detect which software has been used to produce a PDF ﬁle is based on coding style: given patterns that are only created by certain PDF producers. We have analyzed the coding style of 900 PDF ﬁles produced using 11 PDF producers on 3 diﬀerent Operating Systems. We have obtained a set of 192 rules which can be used to identify 11 PDF producers. We have tested our detection tool on 508836 PDF ﬁles published on scientiﬁc preprints servers. Our tool is able to detect certain producers with an accuracy of 100\%. Its overall detection is still high (74\%). We were able to apply our tool to identify how online PDF services work and to spot inconsistency.},
	language = {English},
	author = {Adhatarao, Supriya and Lauradoux, Cédric},
	month = mar,
	year = {2021},
	pages = {11},
	annote = {Summary
"We have analyzed the coding style of 900 PDF files produced using 11 PDF producers on 3 different Operating Systems. We have obtained a set of 192 rules which can be used to identify 11 PDF producers.We have tested our detection tool on 508836 PDF files published on scientific preprints servers. Our tool is able to detect certain producers with an accuracy of 100\%. Its overall detection is still high (74\%). We were able to apply our tool to identify how online PDF services work and to spot inconsistency."
Based on YARA rules but does not look at PDF content streams. Does it look at key ordering and token separators in dictionaries???
Also unclear if key ordering of dictionary and whitespace/token compaction is considered.
No URL to their artifacts!
 },
	file = {2103.02702v1.dataset.png:/Users/tullsen/Zotero/storage/US9EVT7D/2103.02702v1.dataset.png:image/png;Adhatarao and Lauradoux - Robust PDF Files Forensics Using Coding Style.pdf:/Users/tullsen/Zotero/storage/4Q82IT4N/Adhatarao and Lauradoux - Robust PDF Files Forensics Using Coding Style.pdf:application/pdf},
}

@misc{walkerStoryHowLowly2021,
	title = {The {Inside} {Story} of {How} the {Lowly} {PDF} {Played} the {Longest} {Game} in {Tech}},
	url = {https://marker.medium.com/the-improbable-tale-of-how-the-lowly-pdf-played-the-longest-game-in-tech-d143d2ba9abf},
	abstract = {How has the low-fi, 30-year-old innovation reigned for so long?},
	language = {en},
	urldate = {2021-02-28},
	journal = {Medium},
	author = {Walker, Rob},
	month = jan,
	year = {2021},
}

@article{mullerProcessingDangerousPaths2021,
	title = {Processing {Dangerous} {Paths}},
	issn = {1-891562-66-5},
	doi = {https://dx.doi.org/10.14722/ndss.2021.2310},
	abstract = {PDF is the de-facto standard for document exchange. It is common to open PDF ﬁles from potentially untrusted sources such as email attachments or downloaded from the Internet. In this work, we perform an in-depth analysis of the capabilities of malicious PDF documents. Instead of focusing on implementation bugs, we abuse legitimate features of the PDF standard itself by systematically identifying dangerous paths in the PDF ﬁle structure. These dangerous paths lead to attacks that we categorize into four generic classes: (1) Denial-ofService attacks affecting the host that processes the document. (2) Information disclosure attacks leaking personal data out of the victim’s computer. (3) Data manipulation on the victim’s system. (4) Code execution on the victim’s machine. An evaluation of 28 popular PDF processing applications shows that 26 of them are vulnerable at least one attack. Finally, we propose a methodology to protect against attacks based on PDF features systematically.},
	language = {English},
	journal = {Network and Distributed Systems Security (NDSS) Symposium 2021},
	author = {Müller, Jens and Noss, Dominik and Mainka, Christian and Mladenov, Vladislav and Schwenk, Jörg},
	month = feb,
	year = {2021},
	pages = {16},
	file = {Müller et al. - Processing Dangerous Paths.pdf:/Users/tullsen/Zotero/storage/8GWZY2P4/Müller et al. - Processing Dangerous Paths.pdf:application/pdf},
}

@article{vikramGrowingTestCorpus2021,
	title = {Growing {A} {Test} {Corpus} with {Bonsai} {Fuzzing}},
	abstract = {This paper presents a coverage-guided grammarbased fuzzing technique for automatically synthesizing a corpus of concise test inputs. We walk-through a case study of a compiler designed for education and the corresponding problem of generating meaningful test cases to provide to students. The prior state-of-the-art solution is a combination of fuzzing and test-case reduction techniques such as variants of deltadebugging. Our key insight is that instead of attempting to minimize convoluted fuzzer-generated test inputs, we can instead grow concise test inputs by construction using a form of iterative deepening. We call this approach bonsai fuzzing. Experimental results show that bonsai fuzzing can generate test corpora having inputs that are 16–45\% smaller in size on average as compared to a fuzz-then-reduce approach, while achieving approximately the same code coverage and fault-detection capability.},
	language = {English},
	author = {Vikram, Vasudev and Padhye, Rohan and Sen, Koushik},
	month = feb,
	year = {2021},
	pages = {13},
	file = {Vikram et al. - Growing A Test Corpus with Bonsai Fuzzing.pdf:/Users/tullsen/Zotero/storage/889STFTK/Vikram et al. - Growing A Test Corpus with Bonsai Fuzzing.pdf:application/pdf},
}

@article{liuNovelAdversarialExample2021,
	title = {A {Novel} {Adversarial} {Example} {Detection} {Method} for {Malicious} {PDFs} {Using} {Multiple} {Mutated} {Classifiers}},
	url = {https://dfrws.org/wp-content/uploads/2021/01/2021_APAC_paper-a_novel_adversarial_example_detection_method_for_malicious_pdfs_using_multiple_mutated_classifiers.pdf},
	abstract = {PDF malware remains as a major hacking technique. To distinguish malicious PDFs from massive PDF files poses a challenge to forensic investigation. Machine learning has become a mainstream technology for malicious PDF document detection either to help analysts in a forensic investigation or to prevent a system being attacked. However, adversarial attacks against malicious document classifiers have emerged. Crafted adversarial example based on precision manipulation may be easily misclassified. This poses a major threat to many detectors based on machine learning techniques. Various analysis or detection techniques have been available for specific attacks. The challenge from adversarial attacks is still not yet completely resolved. A major reason is that most of the detection methods are tailor-made for existing adversarial examples only. In this paper, based on an interesting observation that most of these adversarial examples were designed on specific models, we propose a novel approach to generate a group of mutated cross-model classifiers such that adversarial examples cannot pass all classifiers easily. Based on a Prediction Inversion Rate (PIR), we can effectively identify adversarial example from benign documents. Our mutated group of classifiers enhances the power of prediction inconsistency using multiple models and eliminate the effect of transferability (a technique to make the same adversarial example work for multiple models) because of the mutation. Our experiments show that we are better than all existing state-of-theart detection methods.},
	language = {English},
	journal = {DFRWS APAC 2021},
	author = {Liu, Chao and Lou, Chenzhe and Yu, Min and Yiu, S M and Chow, K P and Li, Gang and Jiang, Jianguo and Huang, Weiqing},
	month = feb,
	year = {2021},
	pages = {8},
	file = {Liu et al. - 2021 - A Novel Adversarial Example Detection Method for M.pdf:/Users/tullsen/Zotero/storage/J9VP45U2/Liu et al. - 2021 - A Novel Adversarial Example Detection Method for M.pdf:application/pdf},
}

@techreport{fanningPreservationPDF2nd2017,
	address = {USA},
	title = {Preservation with {PDF}/{A} (2nd {Edition})},
	copyright = {© Digital Preservation Coalition 2017, Betsy A Fanning 2017,and AIIM2017, unless otherwise stated},
	url = {http://www.dpconline.org/docs/technology-watch-reports/1707-twr17-01-revised/file},
	abstract = {Foreword The Digital Preservation Coalition (DPC) is an advocate and catalyst for digital preservation, ensuring our members can deliver resilient long-term access to digital content and services. It is a not-for-profit membership organization whose primary objective is to raise awareness of the importance of the preservation of digital material and the attendant strategic, cultural and technological issues. It supports its members through knowledge exchange, capacity building, assurance, advocacy and partnership. The DPC’s vision is to make our digital memory accessible tomorrow.
The DPC Technology Watch Reportsidentify, delineate, monitor and address topics that have a major bearing on ensuring our collected digital memory will be available tomorrow. They provide an advanced introduction in order to support those charged with ensuring a robust digital memory, and they are of general interest to a wide and international audience with interests in computing, information management, collections management and technology. The reports are commissioned after consultation among DPC members about shared priorities and challenges; they are commissioned from experts; and they are thoroughly scrutinized by peers before being released. The authors are asked to provide reports that are informed, current, concise and balanced; that lower the barriers to participation in digital preservation; and that are of wide utility. The reports are a distinctive and lasting contribution to the dissemination of good practice in digital preservation},
	language = {English},
	number = {17-01},
	urldate = {2021-02-21},
	institution = {Digital Preservation Coalition},
	author = {Fanning, Betsy},
	month = jul,
	year = {2017},
	doi = {10.7207/twr17-01},
	note = {Edition: Second
http://dx.doi.org/10.7207/twr17-01},
	pages = {34},
	file = {Fanning - 2017 - Preservation with PDFA (2nd Edition).pdf:/Users/tullsen/Zotero/storage/VDY3DL5E/Fanning - 2017 - Preservation with PDFA (2nd Edition).pdf:application/pdf},
}

@techreport{fanningPreservingDataExplosion2008,
	address = {USA},
	title = {Preserving the {Data} {Explosion}: {Using} {PDF}},
	copyright = {© Digital Preservation Coalition \& AIIM 2008},
	url = {https://www.dpconline.org/docs/technology-watch-reports/86-preserving-the-data-explosion-using-pdf/file},
	abstract = {The introduction of the Internet and desktop computing has transformed the way information is handled. The Internet placed a new level of urgency on the business world in expecting that information would be made available immediately – much faster than what the paper-based information world could handle. This need for information now led to the transition from a paper-based document centric environment to an environment of electronic documents and electronic email. This transition from paper to electronic documents led the way for document file formats like PDF, Portable Document Format to be introduced. A PDF document is essentially an electronic document that is equivalent to the paper document.
This report reviews the use of PDF, Portable Document Format, more specifically, PDF/Archive as an archival file format to preserve an organization's knowledge. It should be noted that the use of PDF or PDF/Archive alone will not ensure the long-term preservation of electronic documents. When PDF/Archive is combined with a comprehensive records management program and formally established records policies and procedures, an organization can be sure that their electronic documents will be preserved.
Electronic documents just like paper-based documents are preserved for reasons beyond why they were created. Organizations preserve documents based on their historical value, to preserve the organization's knowledge, for research, and the uniqueness of the information. Not all information needs to be preserved. The archival value of the information must be assessed based upon the content of the documents being preserved.  While focused on PDF and PDF/Archive, this report will address some of the other file formats which may be used or considered for archiving electronic documents. It will explore the PDF standards efforts and their suitability to long-term preservation including: 
-PDF/Engineering,  
-PDF/Exchange,  
-PDF/Universal Access, and  
-PDF Healthcare. 
PDF/Archive was developed as a result of numerous organisations needing to be able to preserve their electronic documents and be able to access and view the documents at a future time with the document appearing as it had when it was produced. In order to ensure that the documents could be accessed, it was important to ensure that the standard addressed the goals of device independence, files being self-contained and self-documenting, and that there would be no restrictions like encryption that would impede the access to the documents.  
Given the wide acceptance of PDF, the development of PDF/Archive for long-term preservation of electronic documents is a logical use of the file format. Through the use of PDF/A, organizations can be sure that their documents will be preserved for the long term. While PDF/A may be a suitable file format today for long-term preservation of electronic documents, it should be noted that there may be other file formats introduced in the future that may better serve the needs of an organization. Therefore, organizations should be continually reviewing the available file formats to ensure they have selected the best format for their purposes.},
	language = {English},
	number = {08-02},
	institution = {AIIM},
	author = {Fanning, Betsy A},
	month = apr,
	year = {2008},
	pages = {27},
	file = {Fanning - Preserving the Data Explosion Using PDF.pdf:/Users/tullsen/Zotero/storage/USIHMXAQ/Fanning - Preserving the Data Explosion Using PDF.pdf:application/pdf},
}

@misc{isotc171sc2wg8ISONPTS2020,
	title = {{ISO}/{NP} {TS} 32003 {Document} management - {Portable} {Document} {Format} - {Adding} support of {AES}-{GCM} in {PDF} 2.0},
	copyright = {Copyright ISO},
	abstract = {This Technical Specification specifies how to extend the ISO 32000-2 specification by adding extensions to the Encrypt dictionary to support AES-GCM encryption algorithm.},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 171 SC 2 WG 8}},
	month = dec,
	year = {2020},
	keywords = {PDF 2.0, ISO, PDF 2.0 Extension},
}

@misc{isotc171sc2wg8ISONPTS2020a,
	title = {{ISO}/{NP} {TS} 32004 {Document} management - {Portable} {Document} {Format} - {Integrity} protection in encrypted document in {PDF} 2.0},
	copyright = {Copyright ISO},
	abstract = {One of the important aspects of encryption is to also provide integrity protection along with confidentiality. AES-CBC encryption mode used in PDF encryption, by itself, only provides confidentiality of the content and requires other mechanism like Message Authentication Code (MAC) to protect its integrity. This technical specification describes an extension to the PDF Encryption to provide integrity protection in a backward-compatible way.},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 171 SC 2 WG 8}},
	month = dec,
	year = {2020},
	keywords = {PDF 2.0, ISO, PDF 2.0 Extension},
}

@misc{isotc171sc2wg9ISONPTS2020,
	title = {{ISO}/{NP} {TS} 32005, {Document} management - {Portable} {Document} {Format} - {PDF} 1.7 and 2.0 namespace inclusion in {ISO} 32000-2},
	copyright = {Copyright ISO},
	abstract = {ISO 32000-2 introduced the mechanism for defining namespaces within a logically structured PDF document; identified two recognised namespaces for tagged PDF documents; and provided strict hierarchical inclusion rules for the standard structure namespace for PDF 2.0. However, ISO 32000-2 did not specify restrictions and inclusion rules for the standard structure namespace for PDF 1.7. This lack of specificity in usage of the PDF 1.7 namespace along with no clear rules for how the PDF 1.7 namespace and PDF 2.0 namespace interact has left ambiguity in the creation of tagged PDF documents conforming to ISO 32000-2.
The primary purpose of this document is to extend the rules already defined within ISO 32000-2 to resolve this ambiguity and ensure that there are clear rules for the inclusion of both the PDF 1.7 and PDF 2.0 namespaces.},
	language = {English},
	publisher = {ISO},
	author = {ISO TC 171 SC 2 WG 9},
	month = dec,
	year = {2020},
}

@techreport{promSpecificationUsingPDF2021,
	address = {USA},
	type = {text},
	title = {A specification for using {PDF} to package and represent {Email}},
	copyright = {This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License},
	shorttitle = {{EA}-{PDF}},
	url = {https://www.ideals.illinois.edu/handle/2142/109251},
	abstract = {This document establishes high-level functional requirements for an idealized use of ISO 32000 Portable Document Format (PDF) technology as a model for packaging email for archival or other purposes. These requirements provide a framework within which interested people from the archives, library, museum, digital preservation, and developer communities can collaborate to develop a technically detailed specification and implementation reference model. Its development was supported by a grant from the Andrew W. Mellon Foundation to the University of Illinois at Urbana-Champaign.},
	language = {English},
	urldate = {2021-02-03},
	institution = {Board of Trustees of the University of Illinois},
	author = {Prom, Christopher and Simpson, Joel and De Vorsey, Kevin and {Kate Murray} and {Christopher Lee} and {Steve Levinson} and {Camille Watson Tyndall} and {Jamie Patrick Burns} and {Patrica Patterson} and {Lynda Schmitz Fuhrig} and {Stephen Abrams} and {Dietrich von Seggern} and {Duff Johnson} and {Matthew Hardy}},
	month = jan,
	year = {2021},
	note = {http://hdl.handle.net/2142/109251
https://emailarchivestaskforce.org},
	pages = {35},
	file = {Prom et al. - 2021 - A specification for using PDF to package and repre.pdf:/Users/tullsen/Zotero/storage/E6ACI82I/Prom et al. - 2021 - A specification for using PDF to package and repre.pdf:application/pdf},
}

@inproceedings{sushmethaPDFSanitizer2019,
	title = {{PDF} {Sanitizer}},
	volume = {1},
	doi = {10.1109/i-PACT44901.2019.8959957},
	abstract = {The portable document format (PDF) is undoubtedly the most preferable file format for exchanging documents across organizations. In general, organization firewall allows downloading PDF files but not executable files, by policy to prevent infections. Attackers have identified this change and hence moved on to use document processing file formats to spread malware infections. The contemporary solutions namely antivirus and IDS rely their detection on known signatures and patterns. Hence, they are unable to detect zero-day malware. In this paper, we present a novice approach which basically works on sanitization of PDF file instead of detection and removal of malicious PDF. Our approach neither relies on signature database nor on dynamic analysis. We have used a Contagio Dump malware sample dataset, and our solution sanitizes all malicious PDF samples with zero false positive and zero false negative results.},
	booktitle = {2019 {Innovations} in {Power} and {Advanced} {Computing} {Technologies} (i-{PACT})},
	publisher = {IEEE},
	author = {Sushmetha, N. and Rai, N. and Mehra, M.},
	month = mar,
	year = {2019},
	keywords = {PDF, portable document format, digital signatures, invasive software, PDF file, Contagio Dump malware sample dataset, document processing file formats, embedded objects, executable files, flattening, java script, known signatures, malicious, malicious PDF samples, malware infections, organization firewall, PDF sanitizer, preferable file format, sanitizer, zero-day malware},
	pages = {1--5},
	file = {Sushmetha et al. - 2019 - PDF Sanitizer.pdf:/Users/tullsen/Zotero/storage/V6B6IGWZ/Sushmetha et al. - 2019 - PDF Sanitizer.pdf:application/pdf},
}

@article{singhMaliciousPDFReview2016,
	title = {Malicious {PDF} – {A} {Review}},
	volume = {3},
	issn = {2395 -0056},
	url = {https://www.irjet.net/archives/V3/i8/IRJET-V3I8163.pdf},
	abstract = {Nowadays one of the serious threats to system security is malicious PDF files. Attacks via malicious PDF files usually occur through email communications. Various social engineering techniques are being used by the attackers to make users open malicious files. In this paper overview of PDF file structure is provided and basic attacks that occur via PDF files are discussed.},
	language = {English},
	number = {8},
	urldate = {2021-01-15},
	journal = {International Research Journal of Engineering and Technology},
	author = {Singh, Gurjeet},
	month = aug,
	year = {2016},
	file = {Singh - 2016 - Malicious PDF – A Review.pdf:/Users/tullsen/Zotero/storage/7DIURWZY/Singh - 2016 - Malicious PDF – A Review.pdf:application/pdf},
}

@inproceedings{lemayEvalEvilStudy2018,
	title = {Is eval () {Evil} : {A} study of {JavaScript} in {PDF} malware},
	shorttitle = {Is eval () {Evil}},
	url = {https://ieeexplore.ieee.org/document/8659374},
	doi = {10.1109/MALWARE.2018.8659374},
	abstract = {Client-side attacks have become very popular in recent years. Consequently, third party client software, such as Adobe's Acrobat Reader, remains a popular vector for infections. In order to support their malicious activities, PDF malware authors often turn to JavaScript. Because of this malicious intent, JavaScript from malicious PDF is markedly different than JavaScript from non-malicious PDF. This paper presents a detailed analysis of the content of JavaScript from two sources: malicious and non-malicious PDF files gathered from multiple extractions on VirusTotal Intelligence, in order to provide an overview of the significant differences in the distribution of keywords between the two types of JavaScript. The analysis shows that the obfuscation techniques and the generation of exploit triggering code used by malware authors create artefacts, such as the presence of seldom used functions that are not observable in normal files. Additionally, JavaScript from malicious PDF files lack the keywords associated with common PDF automation tasks such as getting new content from the web, interacting with the document or interacting with the user. This provides empirical confirmation of extrapolations into the detection of malicious JavaScript in PDF files from previous research and provides insight for the creation of a classifier based on keyword distributions.},
	booktitle = {2018 13th {International} {Conference} on {Malicious} and {Unwanted} {Software} ({MALWARE})},
	author = {Lemay, A. and Leblanc, S. P.},
	month = oct,
	year = {2018},
	keywords = {Machine learning, Portable document format, Malware detection, PDF malware, Tools, computer viruses, document handling, invasive software, Java, cryptography, JavaScript, Malicious PDF, client-side attacks, Computer viruses, JavaScript Malware, keyword distributions, Keyword Frequency Analysis, malicious activities, malicious intent, Payloads, third party client software, VirusTotal intelligence, Web pages},
	pages = {1--10},
	file = {Lemay and Leblanc - 2018 - Is eval () Evil  A study of JavaScript in PDF mal.pdf:/Users/tullsen/Zotero/storage/4FVAJSKL/Lemay and Leblanc - 2018 - Is eval () Evil  A study of JavaScript in PDF mal.pdf:application/pdf},
}

@article{kuchtaCorrectnessElectronicDocuments2018,
	title = {On the correctness of electronic documents: studying, finding, and localizing inconsistency bugs in {PDF} readers and files},
	volume = {23},
	issn = {1573-7616},
	shorttitle = {On the correctness of electronic documents},
	url = {https://doi.org/10.1007/s10664-018-9600-2},
	doi = {10.1007/s10664-018-9600-2},
	abstract = {Electronic documents are widely used to store and share information such as bank statements, contracts, articles, maps and tax information. Many different applications exist for displaying a given electronic document, and users rightfully assume that documents will be rendered similarly independently of the application used. However, this is not always the case, and these inconsistencies, regardless of their causes—bugs in the application or the file itself—can become critical sources of miscommunication. In this paper, we present a study on the correctness of PDF documents and readers. We start by manually investigating a large number of real-world PDF documents to understand the frequency and characteristics of cross-reader inconsistencies, and find that such inconsistencies are common—13.5\% PDF files are inconsistently rendered by at least one popular reader. We then propose an approach to detect and localize the source of such inconsistencies automatically. We evaluate our automatic approach on a large corpus of over 230 K documents using 11 popular readers and our experiments have detected 30 unique bugs in these readers and files. We also reported 33 bugs, some of which have already been confirmed or fixed by developers.},
	language = {English},
	number = {6},
	journal = {Empirical Software Engineering},
	author = {Kuchta, Tomasz and Lutellier, Thibaud and Wong, Edmund and Tan, Lin and Cadar, Cristian},
	month = dec,
	year = {2018},
	note = {See also http://srg.doc.ic.ac.uk/projects/pdf-errors/results.html},
	keywords = {Cross-software inconsistencies, Document correctness, Error-message clustering, Image comparison},
	pages = {3187--3220},
	file = {Kuchta et al. - 2018 - On the correctness of electronic documents studyi.pdf:/Users/tullsen/Zotero/storage/N3HW8ADG/Kuchta et al. - 2018 - On the correctness of electronic documents studyi.pdf:application/pdf},
}

@misc{isotc171sc2wg8ISO23504120202020,
	title = {{ISO} 23504-1:2020 {Document} management applications - {Raster} image transport and storage - {Part} 1: {Use} of {ISO} 32000 ({PDF}/{R}-1)},
	copyright = {Copyright ISO},
	shorttitle = {{PDF}/{R}},
	url = {https://www.iso.org/standard/75804.html},
	abstract = {This document defines a subset of ISO 32000 suitable for storage, transport and exchange of multi-page raster-image documents, including but not limited to scanned documents. Bitonal, grayscale and RGB images are supported. Compression options for image data streams include JPEG, CCITT Group 4 Fax and uncompressed.},
	language = {English},
	publisher = {ISO},
	author = {ISO TC 171 SC 2 WG 8},
	month = sep,
	year = {2020},
	keywords = {ISO, PDF/raster},
}

@inproceedings{linEfficientSpearphishingThreat2015,
	title = {Efficient spear-phishing threat detection using hypervisor monitor},
	doi = {10.1109/CCST.2015.7389700},
	abstract = {In recent years, cyber security threats have become increasingly dangerous. Hackers have fabricated fake emails to spoof specific users into clicking on malicious attachments or URL links in them. This kind of threat is called a spear-phishing attack. Because spear-phishing attacks use unknown exploits to trigger malicious activities, it is difficult to effectively defend against them. Thus, this study focuses on the challenges faced, and we develop a Cloud-threat Inspection Appliance (CIA) system to defend against spear-phishing threats. With the advantages of hardware-assisted virtualization technology, we use the CIA to develop a transparent hypervisor monitor that conceals the presence of the detection engine in the hypervisor kernel. In addition, the CIA also designs a document pre-filtering algorithm to enhance system performance. By inspecting PDF format structures, the proposed CIA was able to filter 77\% of PDF attachments and prevent them from all being sent into the hypervisor monitor for deeper analysis. Finally, we tested CIA in real-world scenarios. The hypervisor monitor was shown to be a better anti-evasion sandbox than commercial ones. During 2014, CIA inspected 780,000 mails in a company with 200 user accounts, and found 65 unknown samples that were not detected by commercial anti-virus software.},
	booktitle = {2015 {International} {Carnahan} {Conference} on {Security} {Technology} ({ICCST})},
	author = {Lin, C. and Tien, C. and Chen, C. and Tien, C. and Pao, H.},
	month = sep,
	year = {2015},
	keywords = {Portable document format, Malware, Electronic mail, malicious attachments, computer crime, unsolicited e-mail, document handling, invasive software, Monitoring, malicious activities, antievasion sandbox, CIA, cloud computing, cloud-threat inspection appliance, commercial antivirus software, cyber security, cyber security threats, detection engine, document prefiltering algorithm, fake emails, hackers, hardware-assisted virtualization, hardware-assisted virtualization technology, hypervisor kernel, hypervisor monitor, PDF format structures, spear-phishing, spear-phishing attack, spear-phishing threat detection, transparent hypervisor monitor, URL links, user accounts, Virtual machine monitors, virtualisation, Virtualization},
	pages = {299--303},
	file = {Lin et al. - 2015 - Efficient spear-phishing threat detection using hy.pdf:/Users/tullsen/Zotero/storage/XEFH8LPT/Lin et al. - 2015 - Efficient spear-phishing threat detection using hy.pdf:application/pdf},
}

@inproceedings{cosovanPracticalGuideDetecting2014,
	title = {A {Practical} {Guide} for {Detecting} the {Java} {Script}-{Based} {Malware} {Using} {Hidden} {Markov} {Models} and {Linear} {Classifiers}},
	doi = {10.1109/SYNASC.2014.39},
	abstract = {The World Wide Web evolved so rapidly that it is no longer considered a luxury, but a necessity. That is why currently the most popular infection vectors used by cyber criminals are either web pages or commonly used documents (such as pdf files). In both of these cases, the malicious actions performed are written in Java Script. Because of this, Java Script has become the preferred language for spreading malware. In order to be able to stop malicious content from executing, detection of its infection vector is crucial. In this paper we propose various methods for detecting Java Script-based attack vectors. For achieving our goal we first need to fight metamorphism techniques usually used in Java Script malicious code, which are by no means trivial: garbage instruction insertion, variable renaming, equivalent instruction substitution, function permutation, instruction reordering, and so on. Our approach to deal with metamorphism starts with splitting the Java Script content in components and filtering the insignificant ones. We then use a data set, consisting in over one million Java Script files in order to test several machine learning algorithms such as Hidden Markov Models, linear classifiers and hybrid approaches for malware detection. Finally, we analyze these detection methods from a practical point of view, emphasizing the need for a very low false positive rate and the ability to be trained on large datasets.},
	booktitle = {2014 16th {International} {Symposium} on {Symbolic} and {Numeric} {Algorithms} for {Scientific} {Computing}},
	author = {Cosovan, D. and Benchea, R. and Gavrilut, D.},
	month = sep,
	year = {2014},
	keywords = {PDF, Portable document format, Feature extraction, Malware, learning (artificial intelligence), Machine Learning, machine learning algorithms, invasive software, HTML, Java, malware, detection, Web pages, cybercriminals, equivalent instruction substitution, function permutation, garbage instruction insertion, Hidden Markov Model, hidden Markov models, Hidden Markov models, infection vector, infection vectors, instruction reordering, Java Script, JavaScript content, JavaScript files, JavaScript malicious code, JavaScript-based attack vector detection, JavaScript-Based malware detection, Linear Classifier, linear classifiers, metamorphism, metamorphism techniques, pattern classification, Reactive power, variable renaming, vectors, Vectors, Web sites, World Wide Web},
	pages = {236--243},
	file = {Cosovan et al. - 2014 - A Practical Guide for Detecting the Java Script-Ba.pdf:/Users/tullsen/Zotero/storage/CSLRASVK/Cosovan et al. - 2014 - A Practical Guide for Detecting the Java Script-Ba.pdf:application/pdf},
}

@inproceedings{linMultiviewMaliciousDocument2013,
	title = {Multi-view {Malicious} {Document} {Detection}},
	doi = {10.1109/TAAI.2013.43},
	abstract = {Malicious document is one of the most notorious components of modern attacks. The document may appear normal in its format, but behave strangely or beyond users' expectation, sometimes lead to severe consequences when it is opened. Detecting malicious documents tops one of the most important tasks in modern information security. Malicious documents usually contain specific control codes inside which may cause the malicious shell code be executed automatically. The document control code is originally designed to enrich the documents' functionalities, but in this case, it may create vulnerabilities and then become a key to trigger attacks. Detecting control codes of certain pattern is a key to the success of malicious document detection. Different from previous research that was focused on detecting malicious documents of a particular format or containing specific control codes, we propose a method that analyzes the document objects from three general views: the use of functional words, preference words, and constant data. The functional words control how an attack is launched, and through what actions, if the document is considered a malicious one, the preference words usually suggest the favored word choices from document authors, and the constant data can be considered the bullets to complete the attack. We also propose a TF-IDF method to normalize the features to detect documents with mimicry attacks. Overall, given the three feature views, we detect malicious documents under a classification framework. We evaluate the proposed approach through series of experiments that use different view combinations for prediction, followed by some comparison of the proposed method to related work.},
	booktitle = {2013 {Conference} on {Technologies} and {Applications} of {Artificial} {Intelligence}},
	author = {Lin, J. and Pao, H.},
	month = dec,
	year = {2013},
	keywords = {PDF, Portable document format, Data mining, Feature extraction, Detectors, document handling, security of data, mimicry attacks, Entropy, Information security, pattern classification, Accuracy, classification framework, constant data, document control code, exploit, functional words, information security, malicious document, malicious shell code, multi-view, multiview malicious document detection, preference words, TF-IDF method, vulnerability},
	pages = {170--175},
	file = {Lin and Pao - 2013 - Multi-view Malicious Document Detection.pdf:/Users/tullsen/Zotero/storage/MH5TISU2/Lin and Pao - 2013 - Multi-view Malicious Document Detection.pdf:application/pdf},
}

@misc{peterwyattNewStressfulPDF2020,
	title = {A new stressful {PDF} corpus},
	copyright = {Creative Commons Attribution-NoDerivatives 4.0 International License (CC-BY-ND)},
	shorttitle = {New stressful {PDF} corpus},
	url = {https://www.pdfa.org/a-new-stressful-pdf-corpus/},
	abstract = {Interoperability is the core value proposition of the PDF file format. Although interoperability comes from sharing a clear and precise understanding of the file format specification, test data in the form of a corpus of PDF files is critical to ensuring reliable and robust implementations especially in the more complex areas of the specification.  Larger […]},
	language = {en-US},
	urldate = {2020-09-15},
	journal = {PDF Association},
	author = {{Peter Wyatt}},
	month = sep,
	year = {2020},
}

@article{gonzalez-robledoAnalyzingCharacteristicsMalicious2012,
	title = {Analyzing characteristics of malicious {PDFs}},
	volume = {10},
	issn = {1548-0992},
	url = {https://ieeexplore.ieee.org/document/6222583},
	doi = {10.1109/TLA.2012.6222583},
	abstract = {This paper presents an analysis of some characteristics from malicious PDFs files. This characteristics where examined on a testbed of 11,104 malicious and 8,668 non malicious files, the malicious samples includes some 0days and files used on the wild for focused attacks. The PDF format is showed very quickly only to help understand the attack vectors. The malicious PDF files attacks are on the wild for almost three years, and recently this kind of attacks are incrementing, and the techniques used to hide from anti-virus software are growing in complex, so this is why is important now to work on understand how this malicious PDF files are formed.},
	number = {3},
	journal = {IEEE Latin America Transactions},
	author = {Gonzalez-Robledo, H. F.},
	month = apr,
	year = {2012},
	keywords = {Portable document format, Malware, attack vector, computer viruses, Internet, Software, document handling, malware, Vectors, antivirus software, ataques, código malicioso, file organisation, malicious PDF file, pdf, virus, Visualization, XML},
	pages = {1767--1773},
	file = {Gonzalez-Robledo - 2012 - Analyzing characteristics of malicious PDFs.pdf:/Users/tullsen/Zotero/storage/4W44LNGI/Gonzalez-Robledo - 2012 - Analyzing characteristics of malicious PDFs.pdf:application/pdf},
}

@inproceedings{schmittPDFScrutinizerDetecting2012,
	title = {{PDF} {Scrutinizer}: {Detecting} {JavaScript}-based attacks in {PDF} documents},
	url = {https://ieeexplore.ieee.org/document/6297926},
	doi = {10.1109/PST.2012.6297926},
	abstract = {For a long time PDF documents have arrived in the everyday life of the average computer user, corporate businesses and critical structures, as authorities and military. Due to its wide spread in general, and because out-of-date versions of PDF readers are quite common, using PDF documents has become a popular malware distribution strategy. In this context, malicious documents have useful features: they are trustworthy, attacks can be camouflaged by inconspicuous document content, but still, they can often download and install malware undetected by firewall and anti-virus software. In this paper we present PDF Scrutinizer, a malicious PDF detection and analysis tool. We use static, as well as, dynamic techniques to detect malicious behavior in an emulated environment. We evaluate the quality and the performance of the tool with PDF documents from the wild, and show that PDF Scrutinizer reliably detects current malicious documents, while keeping a low false-positive rate and reasonable runtime performance.},
	booktitle = {2012 {Tenth} {Annual} {International} {Conference} on {Privacy}, {Security} and {Trust}},
	author = {Schmitt, F. and Gassen, J. and Gerhards-Padilla, E.},
	month = jul,
	year = {2012},
	keywords = {PDF, Portable document format, Feature extraction, Malware, Electronic mail, computer crime, Software, trusted computing, document handling, malicious PDF detection, invasive software, PDF documents, Java, antivirus software, authorisation, business data processing, client-side exploits, computer user, corporate business, critical structures, document malware, Emulation, false-positive rate, firewall software, inconspicuous document content, JavaScript-based attacks detection, malicious documents, malicious PDF, malicious PDF analysis tool, malware distribution strategy, PDF readers, PDF scrutinizer, Spraying},
	pages = {104--111},
	file = {Schmitt et al. - 2012 - PDF Scrutinizer Detecting JavaScript-based attack.pdf:/Users/tullsen/Zotero/storage/NQBICKDC/Schmitt et al. - 2012 - PDF Scrutinizer Detecting JavaScript-based attack.pdf:application/pdf},
}

@inproceedings{huangchengStaticDetectionModel2012,
	title = {A static detection model of malicious {PDF} documents based on naive {Bayesian} classifier technology},
	url = {https://ieeexplore.ieee.org/document/6413432},
	doi = {10.1109/ICWAMTIP.2012.6413432},
	abstract = {For the purpose of improving native detective method based on signature matching of traditional anti-virus software and inadequate performance of dynamic testing, the researchers demonstrate a new static detection model of malicious PDF documents based on naive Bayes classifier technology. The model considers with exploit techniques of heap spray, JavaScript syntax and shellcode feature. Compare to traditional detection techniques, the training samples and actual test data showed that the detection efficiency and accuracy of the model have improved greatly.},
	booktitle = {2012 {International} {Conference} on {Wavelet} {Active} {Media} {Technology} and {Information} {Processing} ({ICWAMTIP})},
	author = {{Huang Cheng} and {Fang Yong} and {Liu Liang} and {Lu-Rong Wang}},
	month = dec,
	year = {2012},
	keywords = {Portable document format, digital signatures, document handling, malicious PDF documents, Java, pattern classification, malicious document, antivirus software, Abstracts, Bayes methods, Blogs, detection technique, dynamic testing, heap spray, JavaScript syntax, naive Bayes, naive Bayesian classifier technology, native detective method, shellcode feature, signature matching, static detection model, Static model},
	pages = {29--32},
	file = {Huang Cheng et al. - 2012 - A static detection model of malicious PDF document.pdf:/Users/tullsen/Zotero/storage/AF8IN4JJ/Huang Cheng et al. - 2012 - A static detection model of malicious PDF document.pdf:application/pdf},
}

@inproceedings{luDeobfuscationDetectionMalicious2013,
	title = {De-obfuscation and {Detection} of {Malicious} {PDF} {Files} with {High} {Accuracy}},
	url = {https://ieeexplore.ieee.org/document/6480434},
	doi = {10.1109/HICSS.2013.166},
	abstract = {Due to its high popularity and rich functionalities, the Portable Document Format (PDF) has become a major vector for malware propagation. To detect malicious PDF files, the first step is to extract and de-obfuscate Java Script codes from the document, for which an effective technique is yet to be created. However, existing static methods cannot de-obfuscate Java Script codes, existing dynamic methods bring high overhead, and existing hybrid methods introduce high false negatives. Therefore, in this paper, we present MPScan, a scanner that combines dynamic Java Script de-obfuscation and static malware detection. By hooking the Adobe Reader's native Java Script engine, Java Script source code and op-code can be extracted on the fly after the source code is parsed and then executed. We also perform a multilevel analysis on the resulting Java Script strings and op-code to detect malware. Our evaluation shows that regardless of obfuscation techniques, MPScan can effectively de-obfuscate and detect 98\% malicious PDF samples.},
	booktitle = {2013 46th {Hawaii} {International} {Conference} on {System} {Sciences}},
	author = {Lu, X. and Zhuge, J. and Wang, R. and Cao, Y. and Chen, Y.},
	month = jan,
	year = {2013},
	keywords = {Portable document format, Malware, Standards, portable document format, malicious PDF file detection, Dictionaries, document handling, invasive software, Java, file organisation, Adobe Reader JavaScript engine, Cyberspace, Dynamic API Hooking, dynamic JavaScript code de-obfuscation, Educational institutions, Engines, JavaScript De-obfuscation, JavaScript op-code extractION, JavaScript source code extractION, JavaScript strings, malicious PDF file de-obfuscation, malware propagation, MPScan, multilevel analysis, Op-code Signature Matching, program compilers, source code execution, source code parsing, static malware detection},
	pages = {4890--4899},
	file = {Lu et al. - 2013 - De-obfuscation and Detection of Malicious PDF File.pdf:/Users/tullsen/Zotero/storage/3JIRZ5RS/Lu et al. - 2013 - De-obfuscation and Detection of Malicious PDF File.pdf:application/pdf},
}

@inproceedings{shiruzhangHidingNewWords2015,
	title = {Hiding new words in a {PDF} document},
	url = {https://ieeexplore.ieee.org/document/7382202},
	doi = {10.1109/FSKD.2015.7382202},
	abstract = {A new algorithm based on PDF document structure and the new words generated from the hidden information is proposed in this paper. Through software Fontographer we generate two font files with the same character codes. The secrete key is stored in the first file and it contains the font shapes formed from the new words. The private blank characters are stored in the second file and they do not possess font shapes. The private blank characters are embedded in a PDF document, and the secrete key are transmitted to the third party. In the watermark extraction stage, we need download the secrete key from the third party, and then replace the private blank characters' font with it. Since the embedded watermark is blank characters and is overlapped with the original page objects in the PDF document, it will not affect the content, as well as the format of the PDF documents. These guarantee the security and the robustness of the watermark. In a word, our algorithm is simple, robust, and secure.},
	booktitle = {2015 12th {International} {Conference} on {Fuzzy} {Systems} and {Knowledge} {Discovery} ({FSKD})},
	author = {{Shiru Zhang} and {Qidi Li} and {Chen-Chung Liu} and {Gaoyuan Li}},
	month = aug,
	year = {2015},
	keywords = {PDF, Portable document format, Data mining, Software, document handling, Robustness, watermarking, Watermarking, character codes, data encapsulation, document, font files, Fontographer software, information hiding, PDF document, private blank characters, private key cryptography, secret key, Shape, watermark extraction, word hiding},
	pages = {1703--1707},
	file = {Shiru Zhang et al. - 2015 - Hiding new words in a PDF document.pdf:/Users/tullsen/Zotero/storage/AHKEZALC/Shiru Zhang et al. - 2015 - Hiding new words in a PDF document.pdf:application/pdf},
}

@inproceedings{sandeepDetectionMaliciousVideo2020,
	title = {Detection of {Malicious} {Video} {Modifications} using {Perceptual} {Video} {Hashing}},
	url = {https://ieeexplore.ieee.org/abstract/document/9277177},
	doi = {10.1109/ICCCS49678.2020.9277177},
	abstract = {The perceptual video hashing (PVH) function produces fixed-length and a compact video fingerprint (or perceptual video hash) based on the perceptual contents of the video. This fingerprint can be used for detection of malicious video modifications or video tamper detection based on its perceptual contents, video authentication and video copyright protection. Here, the video fingerprint is extracted using the three-dimensional (3D) radial projection (RP) technique and the two-dimensional (2D) discrete cosine transform (DCT). The proposed algorithm is inspired from the following algorithms: video hashing based on 2D-RP of key frames and the video hashing based on 3D-RP technique. The performance of all the aforementioned algorithms are assessed using the receiver operating characteristics curves. The proposed algorithm perform better in the context of detecting malicious attacks.},
	booktitle = {2020 5th {International} {Conference} on {Computing}, {Communication} and {Security} ({ICCCS})},
	author = {Sandeep, R. and Bora, P. K.},
	month = oct,
	year = {2020},
	keywords = {Three-dimensional displays, Authentication, Tools, discrete cosine transform, Discrete cosine transforms, malicious modifications., Mathematical model, Perceptual video hashing, radial projections, Signal processing algorithms, Two dimensional displays},
	pages = {1--5},
	file = {Sandeep and Bora - 2020 - Detection of Malicious Video Modifications using P.pdf:/Users/tullsen/Zotero/storage/5FC2AYUP/Sandeep and Bora - 2020 - Detection of Malicious Video Modifications using P.pdf:application/pdf},
}

@misc{angealbertiniPoCGTFOMirror2019,
	title = {a "{PoC} or {GTFO}" mirror with extra article index, direct links and clean {PDFs}.: angea/pocorgtfo},
	shorttitle = {a "{PoC} or {GTFO}" mirror with extra article index, direct links and clean {PDFs}.},
	url = {https://github.com/angea/pocorgtfo},
	urldate = {2019-04-08},
	author = {{Ange Albertini}},
	year = {2019},
	note = {original-date: 2018-06-23T15:10:06Z},
	keywords = {Polyglot},
	annote = {Summary
Working examples of a "polyglot" file - PDF, HTML, ZIP and other formats in a single file.
See also "Polyglots: Crossing Origins by Crossing Formats" by Magazinius et al.},
	file = {pocorgtfo19.pdf:/Users/tullsen/Zotero/storage/C7MRFW2N/pocorgtfo19.pdf:application/pdf;Albertini - 2019 - a PoC or GTFO mirror with extra article index, d.pdf:/Users/tullsen/Zotero/storage/L6E88RHX/Albertini - 2019 - a PoC or GTFO mirror with extra article index, d.pdf:application/pdf},
}

@inproceedings{mittalPerformanceImprovementNavigating2015,
	title = {Performance improvement for navigating named destinations in {PDF} documents},
	url = {https://ieeexplore.ieee.org/document/7154849},
	doi = {10.1109/IADCC.2015.7154849},
	abstract = {PDF (Portable Document Format) documents have become popular in recent times. PDF documents have named destinations that have reference to info like page number, zoom level, scale factor etc. These named destinations should be resolved when user loads URL with named destination or select link in the PDF. Resolving named destinations in PDF is taking more time that affects response time in PDF after user selection or resolving named destination at the URL load time for rendering PDF in Chrome Browser. In this paper, we present a new dictionary based novel solution for improving time to resolve named destinations. Dictionary is the map of named destinations and corresponding page number in the PDF document. We then present the results of some tests on the purposed approach with the already existing approach to show that the purposed approach performs better compared to existing approach while creation of named destinations dictionary does not take significant time. We conclude that our algorithm can perform better as asynchronous message passing is avoided and we have access to page number corresponding to the named destination in constant time and in synchronous manner.},
	booktitle = {2015 {IEEE} {International} {Advance} {Computing} {Conference} ({IACC})},
	author = {Mittal, D. and {Sudarshan C P}},
	month = jun,
	year = {2015},
	keywords = {PDF, Portable document format, portable document format, Dictionaries, document handling, PDF documents, asynchronous message passing, Browsers, Chrome browser, dictionary based novel solution, Loading, message passing, Message passing, named destinations, named destinations navigation, Navigation, navigation in PDF, online front-ends, page number, performance improvement, scale factor, Uniform resource locators, URL load time, zoom level},
	pages = {973--977},
	file = {Mittal and Sudarshan C P - 2015 - Performance improvement for navigating named desti.pdf:/Users/tullsen/Zotero/storage/TWQ2DG3R/Mittal and Sudarshan C P - 2015 - Performance improvement for navigating named desti.pdf:application/pdf},
}

@misc{gmbhRenderingPDFFiles2018,
	title = {Rendering {PDF} {Files} in the {Browser} with {PDF}.js {\textbar} {Inside} {PSPDFKit}},
	url = {https://pspdfkit.com/blog/2018/render-pdfs-in-the-browser-with-pdf-js/},
	abstract = {A quick overview of how to use PDF.js, created by Mozilla, to show PDFs on your webpage.},
	language = {English},
	urldate = {2019-05-06},
	journal = {PSPDFKit},
	author = {GmbH, PSPDFKit},
	year = {2018},
}

@patent{stanifordSystemsMethodsAnalyzing2018,
	title = {Systems and methods for analyzing {PDF} documents},
	url = {http://www.freepatentsonline.com/9954890.html},
	abstract = {A system and method for detecting malicious activity within a Portable Document Format (PDF) document. The system includes a parser and one or more virtual machines. The parser that, when executed by a hardware processor, examines one or more portions of the PDF document to determine if one or more suspicious characteristics indicative of malicious network content are included in the one or more examined portions of the PDF document. The examined portion(s) in total are less than an entirety of the PDF document. The virtual machine(s) are adapted to receive the PDF document in response to the one or more examined portions of the PDF document being determined to include one or more suspicious characteristics indicative of malicious network content. The virtual machine(s) to process at least the one or more examined portions of the PDF document so as to determine whether the PDF document includes malicious network content.},
	nationality = {United States},
	assignee = {FireEye, Inc. (Milpitas, CA, US)},
	number = {9954890},
	urldate = {2020-08-31},
	author = {Staniford, Stuart Gresley and Aziz, Ashar},
	collaborator = {Lesniewski, Victor D. and Rutan \& Tucker, Llp},
	month = apr,
	year = {2018},
}

@inproceedings{gamalielssonExperiencesImplementingPDF2013,
	title = {Experiences from implementing {PDF} in open source: {Challenges} and opportunities for standardisation processes},
	shorttitle = {Experiences from implementing {PDF} in open source},
	url = {https://ieeexplore.ieee.org/document/6774572},
	doi = {10.1109/SIIT.2013.6774572},
	abstract = {This paper presents novel results concerning specifications of standards and their implementations in open source software. Specifically, our analysis draws from rich insights and experiences related to two open source projects implementing the specification of the PDF format. The study reports on a number of issues, including: lack of clarity in the specification; implementations deviate from specification; licensing and patent issues; and influences between the specification of a standard and its implementations in software systems. Our findings present rich insights from current practice concerning challenges and opportunities for implementing specifications of standards in open source projects, and constitute an important contribution to enhanced standardisation processes.},
	booktitle = {2013 8th {International} {Conference} on {Standardization} and {Innovation} in {Information} {Technology} ({SIIT})},
	author = {Gamalielsson, J. and Lundell, B.},
	month = sep,
	year = {2013},
	keywords = {Portable document format, PDF format, document handling, Licenses, enhanced standardisation processes, Interviews, ISO standards, licensing issues, open source projects, open source software, patent issues, patents, public domain software, software systems, Software systems, standardisation},
	pages = {1--11},
	file = {Gamalielsson and Lundell - 2013 - Experiences from implementing PDF in open source .pdf:/Users/tullsen/Zotero/storage/M97E9WC5/Gamalielsson and Lundell - 2013 - Experiences from implementing PDF in open source .pdf:application/pdf},
}

@inproceedings{nathVideoFilesMultistage2014,
	title = {Video files and multistage attacks: ({Im})possible?},
	doi = {10.1109/INDICON.2014.7030520},
	abstract = {It is a general belief that executable creates more security risk than any other file types. So most of host based as well as network based security systems are not programed to detect threats in non-executable files. These non-executable files includes images, movies and other document files like office or pdf files. Moreover, the non-executable files like movies are of very huge size, which prevents these scanners from scanning these files, since it will take more processing power as well as delays the mission critical process. But these non-executable files are constantly used by all users. These users may be naive or professional users. So it is very much important for us to understand whether these could be a security risk for a mission critical system or not. In recent security breaches, attackers are focusing on the usage of these non-executable files to initiate Advanced Persistent Threats (APTs) or multistage attacks. In this paper, we are analyzing a video file, downloaded from a popular torrent website. Finally, we are extracting the malicious content embedded into it. After analyzing, we have found that the file contains malicious link through which another executable gets downloaded into the host machine. This could be considered as a first stage in multistage attack. This is used for initiating targeted attacks based on victim's interest. Here we are also coming to a conclusion that the multistage attacks are not a totally new method to compromise a system. In this paper we are explaining one of the method followed by the attacker. Here the aim of the attacker was to infect machines with an adware.},
	booktitle = {2014 {Annual} {IEEE} {India} {Conference} ({INDICON})},
	author = {Nath, H. V. and Mehtre, B. M.},
	month = dec,
	year = {2014},
	keywords = {Malware, Software, Web sites, Uniform resource locators, advanced persistent threat, APT, computer network security, host machine, malicious content extraction, malicious link, Media, mission critical process, Motion pictures, multistage attack, network based security system, Peer-to-peer computing, threat detection, video file analysis, Web site},
	pages = {1--5},
	file = {Nath and Mehtre - 2014 - Video files and multistage attacks (Im)possible.pdf:/Users/tullsen/Zotero/storage/8RXAIFTC/Nath and Mehtre - 2014 - Video files and multistage attacks (Im)possible.pdf:application/pdf},
}

@patent{gehtmanMaliciousCodePurification2020,
	title = {Malicious {Code} {Purification} in {Graphics} {Files}},
	url = {http://www.freepatentsonline.com/y2020/0226255.html},
	abstract = {An information handling system improves detection of steganography data embedded in a graphics file by parsing the portable network graphics file to determine a location of a graphics file signature in the graphics file, and determining whether there is data embedded in the graphics file before the graphics signature. The embedded data may then be removed from the graphics file.},
	nationality = {United States},
	assignee = {Dell Products L.P. (Round Rock, TX, US)},
	number = {20200226255},
	urldate = {2020-08-12},
	author = {Gehtman, Yevgeni and Futerman, Maxim},
	collaborator = {Gelagay, Shewaye},
	month = jul,
	year = {2020},
	note = {PNG and JPEG with very specific byte signatures},
}

@phdthesis{adinarennerPDFTransformingDataDriven2020,
	address = {Helsinki, Finland},
	type = {Master of {Art}'s {Thesis}},
	title = {Beyond the {PDF}: {Transforming} {Data}-{Driven} {Government} {Publications} {Through} {Participatory} {Information} {Design}},
	shorttitle = {Beyond the {PDF}},
	url = {https://aaltodoc.aalto.fi/handle/123456789/44890},
	abstract = {As data becomes a commodity to today's society, government bodies that publish data have the unique opportunity to become reliable sources for data as well as for insight into this data. Lately, open government data has become widely available through database interfaces and dashboards. In contrast, the interpretation of data is often still published in PDF reports that are hard to access and inconvenient to use. At the same time, a growing number of data visualization and design tools would allow for more advanced formats that match the expectations of today's users.
The Master's thesis Beyond the PDF examines how a participatory information design approach can be applied to create more useful, useable, and meaningful data-driven government reports for a broad target audience. The thesis addresses two challenges: 1) The methodological challenge of defining a participatory information design approach; and 2) The applied challenge of developing a proposal for an online report format that addresses the needs of researchers, policymakers, journalists, and citizens alike. Following a research through design approach, the thesis constructs the participatory information design approach in theory and applies it in a design project, using the report Key Data on Early Childhood Education and Care in Europe as a case study. The project employs participatory field research methods to involve audiences in the design process, and design methods to create a design solution that meets their needs. Narrative design patterns, in particular, are examined and applied as a means to translate tacit audience needs into meaningful design artefacts.
The outcomes from the design project address the applied challenge. They consist of a visual summary of the audiences' needs, tasks, and behaviors and a prototype of an online report portal that provides new tools and content formats to engage these audiences on multiple levels. Regarding the methodological challenge, the study demonstrates how participatory, information-focused, and designerly methods can be applied to further the dissemination of insight from government data.},
	language = {English},
	school = {Aalto University},
	author = {{Adina Renner}},
	month = may,
	year = {2020},
	note = {http://urn.fi/URN:NBN:fi:aalto-202006213847},
	file = {Adina Renner - 2020 - Beyond the PDF Transforming Data-Driven Governmen.pdf:/Users/tullsen/Zotero/storage/8Q6HITUA/Adina Renner - 2020 - Beyond the PDF Transforming Data-Driven Governmen.pdf:application/pdf},
}

@article{mandalUtilizingExtendedVisual,
	title = {Utilizing {Extended} {Visual} {Cryptography} for {Ensuring} {Safety} and {Accuracy} of {PDF} {File} in {Cloud} {Storage}},
	language = {English},
	author = {Mandal, Suman and Mahmood, Ashiq},
	pages = {7},
	file = {Mandal and Mahmood - Utilizing Extended Visual Cryptography for Ensurin.pdf:/Users/tullsen/Zotero/storage/VQ7PWHYW/Mandal and Mahmood - Utilizing Extended Visual Cryptography for Ensurin.pdf:application/pdf},
}

@article{butlerMaintainingInteroperabilityOpen2020,
	title = {Maintaining interoperability in open source software: {A} case study of the {Apache} {PDFBox} project},
	volume = {159},
	issn = {0164-1212},
	shorttitle = {Maintaining interoperability in open source software},
	url = {http://www.sciencedirect.com/science/article/pii/S0164121219302262},
	doi = {10.1016/j.jss.2019.110452},
	abstract = {Software interoperability is commonly achieved through the implementation of standards for communication protocols or data representation formats. Standards documents are often complex, difficult to interpret, and may contain errors and inconsistencies, which can lead to differing interpretations and implementations that inhibit interoperability. Through a case study of two years of activity in the Apache PDFBox project we examine day-to-day decisions made concerning implementation of the PDF specifications and standards in a community open source software (OSS) project. Thematic analysis is used to identify semantic themes describing the context of observed decisions concerning interoperability. Fundamental decision types are identified including emulation of the behaviour of dominant implementations and the extent to which to implement the PDF standards. Many factors influencing the decisions are related to the sustainability of the project itself, while other influences result from decisions made by external actors, including the developers of dependencies of PDFBox. This article contributes a fine grained perspective of decision-making about software interoperability by contributors to a community OSS project. The study identifies how decisions made support the continuing technical relevance of the software, and factors that motivate and constrain project activity.},
	language = {en},
	urldate = {2021-01-15},
	journal = {Journal of Systems and Software},
	author = {Butler, Simon and Gamalielsson, Jonas and Lundell, Björn and Brax, Christoffer and Mattsson, Anders and Gustavsson, Tomas and Feist, Jonas and Lönroth, Erik},
	month = jan,
	year = {2020},
	keywords = {Portable document format, Standards, Community open source software, Software implementation, Software interoperability},
	pages = {110452},
	file = {Butler et al. - 2020 - Maintaining interoperability in open source softwa.pdf:/Users/tullsen/Zotero/storage/N8VZAD79/Butler et al. - 2020 - Maintaining interoperability in open source softwa.pdf:application/pdf},
}

@misc{GitHubFormalMethod,
	title = {{GitHub} {Formal} {Method} repositories},
	url = {https://github.com/topics/formal-verification},
	language = {English},
	urldate = {2019-08-27},
	journal = {GitHub},
}

@article{mandelbaumPADSMLFunctional,
	title = {{PADS}/{ML}: {A} {Functional} {Data} {Description} {Language}},
	abstract = {Massive amounts of useful data are stored and processed in ad hoc formats for which common tools like parsers, printers, query engines and format converters are not readily available. In this paper, we explain the design, implementation and theory of PADS/ML, a new language and system that facilitates generation of data processing tools for ad hoc formats. The PADS/ML design includes features such as dependent, polymorphic and recursive datatypes, which allow programmers to describe the syntax and semantics of ad hoc data in a concise, easy-to-read notation. The PADS/ML implementation compiles these descriptions into ML structures and functors that include types for parsed data, functions for parsing and printing, and auxiliary support for user-speciﬁed, format-dependent and format-independent tool generation. Finally, the PADS/ML theory gives a precise formal meaning to the descriptions in terms of the semantics of parsing, the semantics of printing, and the types of data structures that represent parsed data.},
	language = {en},
	author = {Mandelbaum, Yitzhak and Fisher, Kathleen and Walker, David and Fernandez, Mary and Gleyzer, Artem},
	pages = {17},
	file = {Mandelbaum et al. - PADSML A Functional Data Description Language.pdf:/Users/tullsen/Zotero/storage/CPSP8HEF/Mandelbaum et al. - PADSML A Functional Data Description Language.pdf:application/pdf},
}

@misc{ryanspeersAnalysisMethodsTooling2020,
	title = {Analysis {Methods} and {Tooling} for {Parsers}},
	url = {https://www.riverloopsecurity.com/blog/2020/06/safedocs-pdf-analysis-methods-intro/},
	abstract = {Cybersecurity solutions for the whole lifecycle of IoT and embedded systems.},
	language = {English},
	urldate = {2020-07-29},
	journal = {River Loop Security},
	author = {{Ryan Speers} and {Paul Li} and {Sophia d'Antoine} and {Michael Locasto}},
	month = jun,
	year = {2020},
	note = {Library Catalog: www.riverloopsecurity.com},
}

@article{clarksonHyperproperties2010,
	title = {Hyperproperties},
	volume = {18},
	issn = {0926-227X},
	url = {https://content.iospress.com/articles/journal-of-computer-security/jcs393},
	doi = {10.3233/JCS-2009-0393},
	abstract = {Trace properties, which have long been used for reasoning about systems, are sets of execution traces. Hyperproperties, introduced here, are sets of trace properties. Hyperproperties can express security policies, such as secure information flow and},
	language = {en},
	number = {6},
	urldate = {2021-01-09},
	journal = {Journal of Computer Security},
	author = {Clarkson, Michael R. and Schneider, Fred B.},
	month = jan,
	year = {2010},
	note = {Publisher: IOS Press},
	pages = {1157--1210},
	file = {Clarkson and Schneider - 2010 - Hyperproperties.pdf:/Users/tullsen/Zotero/storage/QVHU5C9W/Clarkson and Schneider - 2010 - Hyperproperties.pdf:application/pdf},
}

@article{hamlenComputabilityClassesEnforcement2006,
	title = {Computability classes for enforcement mechanisms},
	volume = {28},
	copyright = {Copyright ACM},
	issn = {0164-0925},
	url = {https://doi.org/10.1145/1111596.1111601},
	doi = {10.1145/1111596.1111601},
	abstract = {A precise characterization of those security policies enforceable by program rewriting is given. This also exposes and rectifies problems in prior work, yielding a better characterization of those security policies enforceable by execution monitors as well as a taxonomy of enforceable security policies. Some but not all classes can be identified with known classes from computational complexity theory.},
	number = {1},
	urldate = {2021-01-09},
	journal = {ACM Transactions on Programming Languages and Systems},
	author = {Hamlen, Kevin W. and Morrisett, Greg and Schneider, Fred B.},
	month = jan,
	year = {2006},
	keywords = {edit automata, execution monitoring, inlined reference monitoring, Program rewriting, reference monitors, security automata},
	pages = {175--205},
	file = {Hamlen et al. - 2006 - Computability classes for enforcement mechanisms.pdf:/Users/tullsen/Zotero/storage/WVTIA7RM/Hamlen et al. - 2006 - Computability classes for enforcement mechanisms.pdf:application/pdf},
}

@article{schneiderEnforceableSecurityPolicies2000,
	series = {{ACM} {Transactions} on {Information} and {System} {Security}},
	title = {Enforceable {Security} {Policies}},
	volume = {3},
	copyright = {Copyright ACM},
	url = {https://dl.acm.org/doi/10.1145/353323.353382},
	doi = {https://doi.org/10.1145},
	abstract = {A precise characterization is given for the class of security policies enforceable with mechanisms that work by monitoring system execution, and automata are introduced for specifying exactly that class of security policies. Techniques to enforce security policies specified by such automata are also discussed.},
	language = {English},
	number = {1},
	journal = {ACM Transactions on Information and System Security},
	author = {Schneider, Fred B},
	month = feb,
	year = {2000},
	pages = {21},
	file = {Schneider - Enforceable Security Policies.pdf:/Users/tullsen/Zotero/storage/SEJNS57B/Schneider - Enforceable Security Policies.pdf:application/pdf},
}

@inproceedings{bindraInvisibleCommunicationPortable2011,
	title = {Invisible {Communication} through {Portable} {Document} {File} ({PDF}) {Format}},
	url = {https://ieeexplore.ieee.org/abstract/document/6079562},
	doi = {10.1109/IIHMSP.2011.103},
	abstract = {With the fast paced development of the internet, exchanging or concealing of private data has become a serious concern. This Paper proposes a new and an unidentified steganographic technique to conceal your data which can be further encrypted (through cryptography) in a .pdf file format. The informative-theoretic method for performing steganography deals with text-to-text steganography while restoring the carrier file along with the hidden message after binding it in a PDF file format. This paper introduces a new steganographic technique through a PDF medium. It also suggests plausibility of exploring the possibility of adding layers of security to the suggested information hiding technique.},
	booktitle = {2011 {Seventh} {International} {Conference} on {Intelligent} {Information} {Hiding} and {Multimedia} {Signal} {Processing}},
	publisher = {IEEE},
	author = {Bindra, G. S.},
	month = oct,
	year = {2011},
	keywords = {Portable document format, Receivers, Cryptography, Computers, Internet, PDF format, portable document file, Security, Portable Document Format, cryptography, Educational institutions, information hiding, Communication, Computer aided software engineering, Document Threat, encryption, information theory, informative-theoretic method, Invisible, invisible communication, steganography, text analysis, text-to-text steganography},
	pages = {173--176},
	file = {Bindra - 2011 - Invisible Communication through Portable Document .pdf:/Users/tullsen/Zotero/storage/7HCBTLSD/Bindra - 2011 - Invisible Communication through Portable Document .pdf:application/pdf},
}

@inproceedings{rendon-mirandaAutomaticClassificationScientific2014,
	title = {Automatic {Classification} of {Scientific} {Papers} in {PDF} for {Populating} {Ontologies}},
	volume = {2},
	doi = {10.1109/CSCI.2014.153},
	abstract = {Classification of scientific papers is a task performed by specialized libraries. In a research institution, classification on this type of papers is realized in a very superficial manner and the criteria for classifying documents depends on several people. This paper describes the work related to identify different sections of the paper, automatically classify and instantiate them in an ontology in order to perform inferences about them.},
	booktitle = {2014 {International} {Conference} on {Computational} {Science} and {Computational} {Intelligence}},
	author = {Rendón-Miranda, J. C. and Arana-Llanes, J. Y. and González-Serna, J. G. and González-Franco, N.},
	month = mar,
	year = {2014},
	keywords = {PDF, Portable document format, Data mining, document handling, Libraries, pattern classification, Abstracts, automatic scientific paper classification, Classifiers, document classification, Document classification, Information extraction, Ontologies, ontologies (artificial intelligence), ontology population, Ontology population, research institution, research libraries, scientific information systems, Sociology, specialized libraries, Statistics},
	pages = {319--320},
	file = {Rendón-Miranda et al. - 2014 - Automatic Classification of Scientific Papers in P.pdf:/Users/tullsen/Zotero/storage/3N8V6NIN/Rendón-Miranda et al. - 2014 - Automatic Classification of Scientific Papers in P.pdf:application/pdf},
}

@inproceedings{anantharamanInputHandlingDone2017,
	title = {Input {Handling} {Done} {Right}: {Building} {Hardened} {Parsers} {Using} {Language}-{Theoretic} {Security}},
	shorttitle = {Input {Handling} {Done} {Right}},
	url = {https://ieeexplore.ieee.org/document/8077797},
	doi = {10.1109/SecDev.2017.12},
	abstract = {Input-handling vulnerabilities have been a constant source of security problems for decades. Many famous recent bugs are in fact input-handling bugs. We argue that the techniques for writing parsers in its present form are insufficient, and hence we propose a new pattern. In this tutorial, we will show participants a new design pattern for designing and implementing parsers using this new method. Participants will witness how this new method leads to more readable code that is easier to audit - while also inherently preventing many input-handling mistakes and having a small CPU footprint.},
	booktitle = {2017 {IEEE} {Cybersecurity} {Development} ({SecDev})},
	publisher = {IEEE},
	author = {Anantharaman, P. and Millian, M. C. and Bratus, S. and Patterson, M. L.},
	month = sep,
	year = {2017},
	keywords = {Conferences, Privacy, security of data, program debugging, program compilers, public domain software, Buildings, Computer security, CPU footprint, hardened parsers building, input-handling bugs, input-handling vulnerabilities, language-theoretic security, parsers writing, pattern design, Protocols, security problems, Tutorials},
	pages = {4--5},
	file = {Anantharaman et al. - 2017 - Input Handling Done Right Building Hardened Parse.pdf:/Users/tullsen/Zotero/storage/S9EC3R7D/Anantharaman et al. - 2017 - Input Handling Done Right Building Hardened Parse.pdf:application/pdf},
}

@misc{infuhrInsertScriptPDFMess2015,
	title = {{InsertScript}: {PDF} - {Mess} with the web},
	shorttitle = {{InsertScript}},
	url = {https://insert-script.blogspot.com/2015/05/pdf-mess-with-web.html},
	urldate = {2020-12-16},
	journal = {InsertScript},
	author = {Inführ, Alex},
	month = may,
	year = {2015},
	file = {Inführ - 2015 - InsertScript PDF - Mess with the web.pdf:/Users/tullsen/Zotero/storage/GNJEZRF8/Inführ - 2015 - InsertScript PDF - Mess with the web.pdf:application/pdf},
}

@misc{pdfassociationPDFVTApplication2015,
	title = {{PDF}/{VT} {Application} {Notes}},
	url = {https://www.pdfa.org/resource/pdfvt-application-notes/},
	abstract = {These application notes discuss topics that aid implementers of PDF/VT workflow tools and demonstrate the various design features of the PDF/VT file format. They are therefore most likely to be of interest and value to:
• Developers of software to read and write PDF/VT files, such as vendors of composition tools, RIPs, digital front ends (DFE), etc. This includes workflow components that both read and write (e.g. imposition tools).
• Print workflow integration tool vendors, including web-to-print solutions providers.
• Technically interested users.
• Consultants assisting in optimizingvariable data print workflows},
	language = {English},
	publisher = {PDF Association},
	author = {{PDF Association}},
	month = nov,
	year = {2015},
	keywords = {PDF/VT},
	file = {PDF Association - 2015 - PDFVT Application Notes.pdf:/Users/tullsen/Zotero/storage/TJUKYQP6/PDF Association - 2015 - PDFVT Application Notes.pdf:application/pdf},
}

@misc{isotc171sc2wg8ISO19444120162016,
	title = {{ISO} 19444-1:2016 {Document} management - {XML} {Forms} {Data} {Format} - {Part} 1: {Use} of {ISO} 32000-2 ({XFDF} 3.0)},
	copyright = {Copyright ISO},
	shorttitle = {{XFDF} 3.0},
	url = {https://www.iso.org/standard/74272.html},
	abstract = {This document specifies an XML format for representing forms data and annotations in the Portable Document Format, ISO 32000-2 (PDF 2.0).

This document does not change or add any definitions for any components of ISO 32000-2.},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 171 SC 2 WG 8}},
	month = dec,
	year = {2016},
	keywords = {XFDF},
}

@misc{adobeAdobeSupplementISO2017,
	title = {Adobe {Supplement} to {ISO} 32000-1 {BaseVersion}: 1.7 {ExtensionLevel}: 11},
	copyright = {Copyright Adobe Systems Inc.},
	shorttitle = {Adobe {PDF} 1.7 {BaseVersion}: 1.7 {ExtensionLevel}: 11},
	abstract = {Changes to XFA (specificially it's XFA 3.6) that appeared with Acrobat 11.},
	publisher = {Adobe Systems Inc.},
	author = {{Adobe}},
	month = jun,
	year = {2017},
	note = {It's purely changes to XFA (specificially it's XFA 3.6) and appeared with Acrobat 11.
Leonard
On 4/20/20:
Some pieces of Adobe software create PDF 1.7 Extension Level 11. I have seen such files created by Adobe LiveCycle Designer 11 and Adobe Experience Manager forms output; there may be more.
In the specific samples I couldn't find anything beyond PDF 1.7 including the well-known Extension Levels 3, 5 and 8.},
	keywords = {ISO 32000},
}

@misc{pdfassociationTechnicalNote00102017,
	title = {Technical {Note} 0010: {Clarifications} of {ISO} 19005, parts 1-3 for developers of {PDF}/{A} creators and validators},
	shorttitle = {{TechNote} 0010},
	url = {https://www.pdfa.org/resource/technote-0010-clarifications-of-iso-19005-parts-1-3-for-developers-of-pdfa-creators-and-validators/},
	abstract = {This Technical Note was produced by the PDF Association’s PDF Validation Technical Working Group (TWG).  The TWG developed this content in the context of an in-depth review of existing ISO 19005 (PDF/A) specifications conducted between October, 2014 and December, 2016. Ambiguities were identified by:
• Formally analyzing the PDF/A specifications in the process of implementing veraPDF
• Reviewing the performance of existing PDF/A validators
• Discussing the questions posted to the TWG mailing list
Consistent with the work of the PDF Validation TWG, the understandings specified in this TechNote have been realized in test files produced as part of the veraPDF Test Suite. The test files pertinent to these resolutions are available in the veraPDF Test Corpus located on GitHub: https://github.com/veraPDF/veraPDF-corpus.},
	language = {English},
	publisher = {PDF Association},
	author = {{PDF Association}},
	month = jun,
	year = {2017},
	keywords = {PDF/A},
	file = {PDF Association - 2017 - TechNote 0010 Clarifications of ISO 19005, parts .pdf:/Users/tullsen/Zotero/storage/5EJF743R/PDF Association - 2017 - TechNote 0010 Clarifications of ISO 19005, parts .pdf:application/pdf},
}

@misc{pdfassociationPDFApplicationNote2018,
	title = {{PDF} 2.0 {Application} {Note} 001: {Black} {Point} {Compensation}},
	copyright = {Creative Commons Attribution 4.0 International License},
	shorttitle = {{AppNote} 001 {BPC}},
	url = {https://www.pdfa.org/resource/pdf-2-0-application-note-001-black-point-compensation/},
	abstract = {Black point compensation (BPC) is a technique used in colour management. It adjusts the colour transformation that will be applied, especially towards the shadow end of the tone scale, by aligning the darkest colour that could be described by the colour space of the data to be displayed with the darkest colour that the output profile for the display device (screen or print) can produce. It’s therefore very similar to what switching between absolute and relative colorimetric rendering intents does at the highlight end of the tone scale. That aligns the lightest colour in the colour space in the PDF file to the lightest colour that the output device can produce, on a printing device that usually means the underlying substrate. Black point compensation as used in PDF is formally defined in ISO 18619:2015.},
	language = {English},
	publisher = {PDF Association},
	author = {{PDF Association}},
	month = sep,
	year = {2018},
	keywords = {PDF 2.0},
	file = {PDF Association - 2018 - PDF 2.0 Application Note 001 Black Point Compensa.pdf:/Users/tullsen/Zotero/storage/RG8PLGKJ/PDF Association - 2018 - PDF 2.0 Application Note 001 Black Point Compensa.pdf:application/pdf},
}

@misc{pdfassociationPDFApplicationNote2018a,
	title = {{PDF} 2.0 {Application} {Note} 002: {Associated} {Files}},
	copyright = {Creative Commons Attribution 4.0 International License},
	shorttitle = {{AppNote} 002 {AF}},
	url = {https://www.pdfa.org/resource/pdf-2-0-application-note-002-associated-files/},
	abstract = {Today, “attachments” are a widely used concept in the world of email and text messages. Every email client supports the ability to attach a file; everyone who uses a computer or smartphone learns how to use this feature. The use of attachments in email has, however, some limitations. For example, there is no interoperable way to link an attachment to a certain part of the text in the email body. Nor is it possible to specify in an interoperable way the relationship between the ‘container’ email and its attachment(s). This information - the nature (beyond MIME-type) and purpose of the attachment - can only be described in the email body.Associated Files (ISO 32000-2, 14.13) leverage the ubiquity of PDF to build on the commonplace concept of “attachments” in electronic communications. The Associated Files model is exceptionally open, and can address a wide variety of use cases. Using the Associated Files feature, PDF writers can provide additional information about files related to a PDF file in a standardized and therefore machine-readable and potentially machine-actionable way. The Associated Files mechanism also provides for connecting metadata about a related object with the related object itself.},
	language = {English},
	publisher = {PDF Association},
	author = {{PDF Association}},
	month = oct,
	year = {2018},
	keywords = {PDF 2.0},
	file = {PDF Association - 2018 - PDF 2.0 Application Note 002 Associated Files.pdf:/Users/tullsen/Zotero/storage/ZD68DB9A/PDF Association - 2018 - PDF 2.0 Application Note 002 Associated Files.pdf:application/pdf},
}

@misc{isotc171sc2wg8ISO19444120192019,
	title = {{ISO} 19444-1:2019 {Document} management - {XML} {Forms} {Data} {Format} - {Part} 1: {Use} of {ISO} 32000-2 ({XFDF} 3.0)},
	copyright = {Copyright ISO},
	shorttitle = {{XFDF} 3.0},
	url = {https://www.iso.org/standard/74272.html},
	abstract = {This document specifies an XML format for representing forms data and annotations in the Portable Document Format, ISO 32000-2 (PDF 2.0).

This document does not change or add any definitions for any components of ISO 32000-2.},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 171 SC 2 WG 8}},
	month = aug,
	year = {2019},
	keywords = {ISO, XFDF},
}

@misc{isotc130wg2ISODIS1661232020,
	title = {{ISO}/{DIS} 16612-3:2020 {Graphic} technology - {Variable} data exchange - {Part} 3: {Using} {PDF}/{X}-6 ({PDF}/{VT}-3)},
	copyright = {Copyright ISO},
	shorttitle = {{PDF}/{VT}-3},
	url = {https://www.iso.org/standard/75218.html},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 130 WG 2}},
	month = mar,
	year = {2020},
	keywords = {PDF 2.0, ISO, ISO 16612, PDF/VT, PDF/X},
}

@misc{isotc171sc2wg5ISODTS2406412020,
	title = {{ISO}/{DTS} 24064-1:202x {Document} management - {Portable} {Document} {Format} - {RichMedia} annotations conforming to the {ISO} 10303:242 ({STEP} {AP242}) specification},
	copyright = {Copyright ISO},
	shorttitle = {{STEP} {AP242} in {PDF}},
	url = {https://www.iso.org/standard/77686.html},
	abstract = {ISO 32000-2 is able to embed 3D CAD models as either 3D or RichMedia annotations. 3D node, stream and view dictionaries can only reference data saved in either the ECMA-363, Universal 3D (U3D) file format or ISO 14739-1, Product Representation Compact (PRC) file format. This often requires the authoritative CAD data to be translated to either U3D or PRC solely to embed the data in a PDF file.
STEP is a set of specifications and methods that enable the exchange and sharing of enterprise engineering information. It is an international standard (ISO 10303) with many application protocols (APs) that have a common core data definition. For example, AP203, AP214, AP238, and AP242 use the same definitions for three-dimensional geometry, assembly data and basic product information. ISO 10303-42 Geometric and topological representation and ISO 10303-46 Visual presentation are the key standard parts for geometry representation and visual presentation. Most 3D design, engineering and manufacturing software can read and/or write data stored in the STEP format.
The current version of STEP, AP 242, was developed to converge the AP 203 and AP 214 standards. STEP AP 242 Edition 1 contains all the functionality covered by the AP 203 ed2 and AP 214 ed3. In addition, it adds new functionality including 3D tessellated geometry, composites and 3D PMI. The major technical impact of the STEP AP 242 standard covers the areas of:
• Model Based Design (MBD)
• Long Term Archiving (LTA)
• Engineering data exchange including composites
• Manufacturing data exchange including Product and Manufacturing Information (PMI)},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 171 SC 2 WG 5}},
	month = mar,
	year = {2020},
	keywords = {PDF 2.0, ISO, PDF 2.0 Extension},
}

@misc{isotc171sc2wg7ISOPDTS246542020,
	title = {{ISO}/{PDTS} 24654:202x {Document} management - {Portable} {Document} {Format} - {Non}-{Rectangular} {Links}},
	copyright = {Copyright ISO},
	shorttitle = {Non-rectangular links},
	url = {https://www.iso.org/standard/79150.html},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 171 SC 2 WG 7}},
	month = mar,
	year = {2020},
	keywords = {PDF 2.0, ISO, PDF 2.0 Extension},
}

@misc{borisdoubrovPDFApplicationNote2020,
	title = {{PDF} 2.0 {Application} {Note} 003: {Use} of object metadata streams},
	copyright = {Creative Commons Attribution 4.0 International License},
	shorttitle = {{AppNote} 003 {Metadata}},
	abstract = {In most cases metadata is all about interoperability: It is only useful if creator and processor agree on syntax and semantics. In the context of PDF this also means that creator and processor must agree on where metadata is present in the often-complex internal PDF structure.
Metadata locations are well defined for XMP metadata on the PDF document level and for some PDF objects, but not for all objects where metadata can occur (pages, page objects and resources such as fonts or ICC profiles). However, in almost all cases sufficient guidance can be derived from the PDF specification, albeit sometimes in an indirect way.},
	language = {English},
	publisher = {PDF Association},
	author = {{Boris Doubrov} and {Dietrich von Seggern}},
	month = jun,
	year = {2020},
	keywords = {PDF 2.0},
	file = {Boris Doubrov and Dietrich von Seggern - PDF 2.0 Application Note 003 Use of object metada.pdf:/Users/tullsen/Zotero/storage/ZMXRN3ZL/Boris Doubrov and Dietrich von Seggern - PDF 2.0 Application Note 003 Use of object metada.pdf:application/pdf},
}

@misc{isotc171sc2wg8ISOPDTS320012020,
	title = {{ISO}/{PDTS} 32001 {Document} management - {Portable} {Document} {Format} - {Extensions} to {Encryption} and {Hash} {Algorithm} {Support} in {ISO} 32000-2 ({PDF} 2.0)},
	copyright = {Copyright ISO},
	shorttitle = {{ISO}/{TS} 32001},
	abstract = {Hash Algorithms and Encryption are a fundamental part of the ISO 32000 standard. ISO 32000-2 updated both hash algorithms and encryption, but in the time since that standard was published, new algorithms have been developed or risen to prominence.
To ensure that PDF remains relevant in the fast-moving world of cryptography and remains current with best practices, these techniques need to be refreshed and updated regularly. This document builds upon the mechanisms present in ISO 32000-2 and extends and enhances them to meet the latest needs of the industry.},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 171 SC 2 WG 8}},
	month = aug,
	year = {2020},
	keywords = {PDF 2.0, ISO, PDF 2.0 Extension},
}

@misc{isotc171sc2wg8ISOPDTS320022020,
	title = {{ISO}/{PDTS} 32002 {Document} management - {Portable} {Document} {Format} - {Extensions} to {Digital} {Signatures} in {ISO} 32000-2 ({PDF} 2.0)},
	copyright = {Copyright ISO},
	abstract = {Digital Signatures (PDF 1.3) are a fundamental part of the ISO 32000 standard. ISO 32000-2 updated digital signature support, but in the time since that standard was published, new algorithms have been developed or risen to prominence.
To ensure that PDF remains relevant in the fast-moving world of cryptography and remains current with best practices, these techniques need to be refreshed and updated regularly. This document builds upon the mechanisms present in ISO 32000-2 and extends and enhances them to meet the latest needs of the industry.},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 171 SC 2 WG 8}},
	month = aug,
	year = {2020},
	keywords = {PDF 2.0, ISO, PDF 2.0 Extension},
}

@misc{pdfaccessibilitytwgMatterhornProtocolPDF2020,
	title = {Matterhorn {Protocol} - {PDF}/{UA} {Conformance} {Testing} {Model} - {Version} 1.1},
	shorttitle = {Matterhorn {Protocol} v1.1},
	language = {English},
	publisher = {PDF Association},
	author = {{PDF Accessibility TWG}},
	month = nov,
	year = {2020},
	keywords = {PDF/UA},
	file = {2020 - Matterhorn Protocol - PDFUA Conformance Testing M.pdf:/Users/tullsen/Zotero/storage/MQ6JYK2N/2020 - Matterhorn Protocol - PDFUA Conformance Testing M.pdf:application/pdf},
}

@misc{isotc130wg2ISO16612320202020,
	title = {{ISO} 16612-3:2020 {Graphic} technology - {Variable} data exchange - {Part} 3: {Using} {PDF}/{X}-6 ({PDF}/{VT}-3)},
	copyright = {Copyright ISO},
	shorttitle = {{PDF}/{VT}-3},
	url = {https://www.iso.org/standard/75218.html},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 130 WG 2}},
	month = nov,
	year = {2020},
	keywords = {PDF 2.0, ISO, ISO 16612, PDF/VT, PDF/X},
}

@misc{isotc171sc2wg8ISOAWITS,
	title = {{ISO}/{AWI} {TS} 24064 {Document} management - {Portable} {Document} {Format} - {3D} data streams conforming to the {ISO} 10303:242 ({STEP} {AP242}) specification},
	copyright = {Copyright ISO},
	url = {https://www.iso.org/standard/77686.html},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 171 SC 2 WG 8}},
	keywords = {PDF 2.0, ISO, PDF 2.0 Extension},
}

@inproceedings{srndicDetectionMaliciousPDF2013,
	title = {Detection of malicious {PDF} files based on hierarchical document structure},
	booktitle = {Proceedings of the 20th {Annual} {Network} \& {Distributed} {System} {Security} {Symposium}},
	publisher = {Citeseer},
	author = {Šrndic, Nedim and Laskov, Pavel},
	year = {2013},
	pages = {1--16},
	annote = {Summary
Corpus = 660,000 = 570,000 PDFs from VirusTotal + 90,000 benign PDFs from Google.
Looks at "structural properties" of PDF using PDF parsing (Poppler).
Good list of various PDF malware detection tools.
Focus only on JavaScript.},
	file = {Šrndic and Laskov - 2013 - Detection of malicious pdf files based on hierarch.pdf:/Users/tullsen/Zotero/storage/Q567T563/Šrndic and Laskov - 2013 - Detection of malicious pdf files based on hierarch.pdf:application/pdf},
}

@misc{isotc130wg2ISO19593120182018,
	title = {{ISO} 19593-1:2018 {Graphic} technology - {Use} of {PDF} to associate processing steps and content data - {Part} 1: {Processing} steps for packaging and labels},
	copyright = {Copyright ISO},
	shorttitle = {{ISO} 19593-1:2018 {PDF} {Processing} {Steps}},
	url = {https://www.iso.org/standard/65428.html},
	abstract = {his document describes a method for storing data in a PDF file that correspond to the processing steps of printed products. This method has three parts:
1) metadata identifying processing steps;
2) limitations on the interaction between PDF graphics objects that are part of a processing step and other PDF graphics objects;
3) limitations on PDF graphics objects in processing steps.
This method is intended to be generic, i.e. not specific to packaging and labels.
In addition, this document defines the following packaging- and label-specific groups of processing-steps data:
— data corresponding to finishing steps, such as cutting, folding or glueing;
— Braille;
— information panels;
— indications of physical dimensions;
— indications of intended positions of graphical elements;
— printed white, for example on transparent or metallic surfaces;
— printed varnish.},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 130 WG 2}},
	month = jul,
	year = {2018},
	note = {PDF cannot be attached due to copyright restrictions. This standard must be purchased.},
	keywords = {ISO, PDF/X},
}

@article{devriesStandardizationAgendaResearch2018,
	title = {Standardization: {Towards} an {Agenda} for {Research}},
	volume = {16},
	issn = {2470-8542, 2470-8550},
	shorttitle = {Standardization},
	url = {http://services.igi-global.com/resolvedoi/resolve.aspx?doi=10.4018/IJSR.2018010104},
	doi = {10.4018/IJSR.2018010104},
	abstract = {Standardization research is a fairly new and is a still-evolving field of research, with possibly major practical ramifications. This article presents a summary of the authors’ subjective views of the most pressing research topics in the field. These include, among others, standards (e.g. incorporation of ethical issues), the potential impact of standards, the corporate management of standardization and legal issues like Intellectual Property Rights (IPR). In addition, gaps have been identified with a respect to a basic understanding of standardization, suggesting a need for better education in the field.},
	language = {English},
	number = {1},
	urldate = {2021-06-11},
	journal = {International Journal of Standardization Research},
	author = {de Vries, Henk and Jakobs, Kai and Egyedi, Tineke M. and Eto, Manabu and Fertig, Stephan and Kanevskaia, Olia and Klintner, Louise and Koch, Claudia and Mijatovic, Ivana and Mirtsch, Mona and Morone, Piergiuseppe and Orviska, Marta and Riillo, Cesare and Scaramuzzino, Gianluca},
	month = jan,
	year = {2018},
	pages = {52--59},
	file = {de Vries et al. - 2018 - Standardization Towards an Agenda for Research.pdf:/Users/tullsen/Zotero/storage/PTD4EBWE/de Vries et al. - 2018 - Standardization Towards an Agenda for Research.pdf:application/pdf},
}

@article{villarrealISOIECOOXML2016,
	title = {Is the {ISO}/{IEC} {OOXML} {Standard} an {International} {Standard} under the {TBT} {Agreement}?:},
	volume = {14},
	issn = {2470-8542, 2470-8550},
	shorttitle = {Is the {ISO}/{IEC} {OOXML} {Standard} an {International} {Standard} under the {TBT} {Agreement}?},
	url = {http://services.igi-global.com/resolvedoi/resolve.aspx?doi=10.4018/IJSR.2016010102},
	doi = {10.4018/IJSR.2016010102},
	abstract = {The Technical Barriers to Trade (TBT) Agreement requires that national regulations and standards are based on international standards; however, it fails to define the terms international standard and international standardizing body (ISB). As of today, the panels and the Appellate Body of the World Trade Organization (WTO) have put more emphasis on the requirement that a standard is adopted by an ISB than the process through which it is adopted to be considered as ‘international’ for the TBT Agreement. This article shows, using the standardization process of the OOXML in the ISO/ IEC JTC1 as an example, that an ISB may adopt standards that are not necessarily international for the Agreement purposes. Hence, the importance that in future disputes the panels and the Appellate Body follow the path opened in the US-Tuna II case in which the Appellate Body stated that there may be additional procedural conditions that must be met for a standard to be considered international for the TBT Agreement.},
	language = {English},
	number = {1},
	urldate = {2021-06-11},
	journal = {International Journal of Standardization Research},
	author = {Villarreal, Andrea Barrios},
	month = jan,
	year = {2016},
	pages = {20--33},
	file = {Is-the-ISO_IEC-OOXML-Standard-an-International-Standard-under-the-TBT-Agreement .pdf:/Users/tullsen/Zotero/storage/IBJJSFIS/Is-the-ISO_IEC-OOXML-Standard-an-International-Standard-under-the-TBT-Agreement .pdf:application/pdf;Villarreal - 2016 - Is the ISOIEC OOXML Standard an International Sta.pdf:/Users/tullsen/Zotero/storage/5Q2ZAUCA/Villarreal - 2016 - Is the ISOIEC OOXML Standard an International Sta.pdf:application/pdf},
}

@article{lundellImplementationOpenStandards2015,
	title = {On {Implementation} of {Open} {Standards} in {Software}: {To} {What} {Extent} {Can} {ISO} {Standards} be {Implemented} in {Open} {Source} {Software}?},
	volume = {13},
	issn = {2470-8542, 2470-8550},
	shorttitle = {On {Implementation} of {Open} {Standards} in {Software}},
	url = {http://services.igi-global.com/resolvedoi/resolve.aspx?doi=10.4018/IJSR.2015010103},
	doi = {10.4018/IJSR.2015010103},
	abstract = {Several European countries, as well as the European Commission, have acknowledged the importance of open standards (under various definitions of that term) and have taken steps accordingly. Formal (e.g. ISO) standards are often referred to in software development and procurement, but may not necessarily also be open standards. The authors consider the application of formal standards where national policy promotes their use, and, since much contemporary software development involves open source software, they further consider the interaction between the requirement to comply with open standards, and the implementation of open and formal standards in open source software, with particular reference to patent licensing. It is shown that not all formal standards are open standards. SSO policies and procedures regarding the notification of standards-essential patents (SEPs) present challenges for organisations wishing to implement standards in software since such policies and procedures need to be compliant with procurement requirements, patent licences and open source software licences. This paper draws out some implications for those organisations (differentiating where appropriate between small companies and other organisations) and suggests a number of ways of addressing the challenges identified. Use of formal standards may create barriers for implementation in open source software and inhibit an open and inclusive business-friendly ecosystem, and to avoid such barriers is of particular importance for small companies that are essential players in an innovative and international society.},
	language = {English},
	number = {1},
	urldate = {2021-06-11},
	journal = {International Journal of Standardization Research},
	author = {Lundell, Björn and Gamalielsson, Jonas and Katz, Andrew},
	month = jan,
	year = {2015},
	pages = {47--73},
	file = {Lundell et al. - 2015 - On Implementation of Open Standards in Software T.pdf:/Users/tullsen/Zotero/storage/T8XBRLFP/Lundell et al. - 2015 - On Implementation of Open Standards in Software T.pdf:application/pdf},
}

@misc{millerExplorationJSONInteroperability,
	title = {An {Exploration} of {JSON} {Interoperability} {Vulnerabilities}},
	url = {https://labs.bishopfox.com/tech-blog/an-exploration-of-json-interoperability-vulnerabilities},
	abstract = {Learn more about how the same JSON document can be parsed with different values across microservices, leading to a variety of potential security risks.},
	language = {English},
	urldate = {2021-06-24},
	author = {Miller, Jake},
}

@misc{isotc130wg2ISO15930120012001,
	title = {{ISO} 15930-1:2001 {Graphic} technology - {Prepress} digital data exchange - {Use} of {PDF} - {Part} 1: {Complete} exchange using {CMYK} data ({PDF}/{X}-1 and {PDF}/{X}-1a)},
	copyright = {Copyright ISO},
	shorttitle = {{ISO} 15930-1 ({PDF}/{X}-1)},
	url = {http://www.iso.org/cms/render/live/en/sites/isoorg/contents/data/standard/02/90/29061.html},
	abstract = {Graphic technology -- Prepress digital data exchange -- Use of PDF -- Part 1: Complete exchange using CMYK data (PDF/X-1 and PDF/X-1a)},
	language = {English},
	urldate = {2019-05-07},
	publisher = {ISO},
	editor = {ISO TC 130 WG 2},
	month = dec,
	year = {2001},
	note = {PDF cannot be attached due to copyright restrictions. This standard must be purchased.},
	keywords = {ISO, ISO 15930, PDF/X, PDF 1.3},
}

@misc{TestingJHOVEPDF,
	title = {Testing {JHOVE} {PDF} {Module}: the good, the bad, and the not well-formed},
	shorttitle = {Testing {JHOVE} {PDF} {Module}},
	url = {https://openpreservation.org/blogs/testing-jhove-pdf-module-the-good-the-bad-and-the-not-well-formed/},
	abstract = {Organisations that use JHOVE for PDF validation will already be familiar with the number of error messages it reports. The recently released JHOVE v1.16 Release Candidate...},
	language = {en-GB},
	urldate = {2021-06-19},
	journal = {Open Preservation Foundation},
	file = {Snapshot:/Users/tullsen/Zotero/storage/UR4PWYE7/testing-jhove-pdf-module-the-good-the-bad-and-the-not-well-formed.html:text/html},
}

@inproceedings{bangertNailPracticalInterface2014,
	address = {San Jose, CA},
	title = {Nail: {A} {Practical} {Interface} {Generator} for {Data} {Formats}},
	isbn = {978-1-4799-5103-1},
	shorttitle = {Nail},
	url = {http://ieeexplore.ieee.org/document/6957299/},
	doi = {10.1109/SPW.2014.31},
	abstract = {We present Nail, an interface generator that allows programmers to safely parse and generate protocols defined by a Parser-Expression based grammar. Nail uses a richer set of parser combinators that induce an internal representation, obviating the need to write semantic actions. Nail also provides solutions parsing common patterns such as length and offset fields within binary formats that are hard to process with existing parser generators.},
	language = {English},
	urldate = {2019-08-09},
	booktitle = {2014 {IEEE} {Security} and {Privacy} {Workshops}},
	publisher = {IEEE},
	author = {Bangert, Julian and Zeldovich, Nickolai},
	month = may,
	year = {2014},
	pages = {158--166},
	file = {Bangert and Zeldovich - 2014 - Nail A Practical Interface Generator for Data For.pdf:/Users/tullsen/Zotero/storage/M9E6DSHL/Bangert and Zeldovich - 2014 - Nail A Practical Interface Generator for Data For.pdf:application/pdf},
}

@article{wangSuperionGrammarAwareGreybox2019,
	title = {Superion: {Grammar}-{Aware} {Greybox} {Fuzzing}},
	shorttitle = {Superion},
	url = {http://arxiv.org/abs/1812.01197},
	abstract = {In recent years, coverage-based greybox fuzzing has proven itself to be one of the most effective techniques for ﬁnding security bugs in practice. Particularly, American Fuzzy Lop (AFL for short) is deemed to be a great success in fuzzing relatively simple test inputs. Unfortunately, when it meets structured test inputs such as XML and JavaScript, those grammar-blind trimming and mutation strategies in AFL hinder the effectiveness and efﬁciency. To this end, we propose a grammar-aware coverage-based greybox fuzzing approach to fuzz programs that process structured inputs. Given the grammar (which is often publicly available) of test inputs, we introduce a grammar-aware trimming strategy to trim test inputs at the tree level using the abstract syntax trees (ASTs) of parsed test inputs. Further, we introduce two grammar-aware mutation strategies (i.e., enhanced dictionary-based mutation and tree-based mutation). Speciﬁcally, tree-based mutation works via replacing subtrees using the ASTs of parsed test inputs. Equipped with grammar-awareness, our approach can carry the fuzzing exploration into width and depth. We implemented our approach as an extension to AFL, named Superion; and evaluated the effectiveness of Superion on real-life large-scale programs (a XML engine libplist and three JavaScript engines WebKit, Jerryscript and ChakraCore). Our results have demonstrated that Superion can improve the code coverage (i.e., 16.7\% and 8.8\% in line and function coverage) and bug-ﬁnding capability (i.e., 31 new bugs, among which we discovered 21 new vulnerabilities with 16 CVEs assigned and 3.2K USD bug bounty rewards received) over AFL and jsfunfuzz. We also demonstrated the effectiveness of our grammar-aware trimming and mutation.},
	language = {English},
	urldate = {2020-07-09},
	journal = {arXiv:1812.01197 [cs]},
	author = {Wang, Junjie and Chen, Bihuan and Wei, Lei and Liu, Yang},
	month = jan,
	year = {2019},
	note = {arXiv: 1812.01197},
	keywords = {Computer Science - Cryptography and Security, Computer Science - Software Engineering},
	file = {Wang et al. - 2019 - Superion Grammar-Aware Greybox Fuzzing.pdf:/Users/tullsen/Zotero/storage/23E3MHU6/Wang et al. - 2019 - Superion Grammar-Aware Greybox Fuzzing.pdf:application/pdf},
}

@misc{folmerPressureCookerApproach2016,
	type = {chapter},
	title = {The {Pressure} {Cooker} {Approach} for {Open} {Standards} {Development}},
	copyright = {Access limited to members},
	url = {https://www.igi-global.com/chapter/the-pressure-cooker-approach-for-open-standards-development/www.igi-global.com/chapter/the-pressure-cooker-approach-for-open-standards-development/141763?camid=4v1a},
	abstract = {The new open world requires new approaches for IT development, and in particular for open standards. In the old world an average development time of 36 months (for European standards) is current practice, but not aligned with current and future needs. This chapter introduces a pressure cooker approa...},
	language = {en},
	urldate = {2021-06-19},
	journal = {Effective Standardization Management in Corporate Settings},
	author = {Folmer, Erwin and Roes, Jasper},
	year = {2016},
	doi = {10.4018/978-1-4666-9737-9.ch006},
	note = {ISBN: 9781466697379
Pages: 105-122
Publisher: IGI Global},
	file = {Folmer and Roes - 2016 - The Pressure Cooker Approach for Open Standards De.pdf:/Users/tullsen/Zotero/storage/7SPDUGU8/Folmer and Roes - 2016 - The Pressure Cooker Approach for Open Standards De.pdf:application/pdf;Folmer and Roes - 2016 - The Pressure Cooker Approach for Open Standards De.pdf:/Users/tullsen/Zotero/storage/HXV887TQ/Folmer and Roes - 2016 - The Pressure Cooker Approach for Open Standards De.pdf:application/pdf},
}

@book{kuribayashiDataHidingText2018,
	title = {Data {Hiding} for {Text} {Document} in {PDF} {File}},
	isbn = {978-3-319-63855-3},
	url = {https://www.researchgate.net/publication/318601330_Data_Hiding_for_Text_Document_in_PDF_File},
	abstract = {Among data hiding methods for documents, the spaces between words and paragraphs are popular features for embedding information. However, in order to avoid the visible distortions, the embedding capacity is limited to be small in conventional methods. In this paper, we regard a collection of space lengths as one dimensional feature vector and apply signal processing approaches for the vector to embed more information with less distortions. We also focus on the Portable Document Format (PDF) files and propose a new data hiding method for PDF files with large capacity. Considering the secrecy of the embedded information, a random permutation and dither modulation are introduced in the operation.},
	author = {Kuribayashi, Minoru and Fukushima, Takuya and Funabiki, Nobuo},
	month = aug,
	year = {2018},
	doi = {10.1007/978-3-319-63856-0_47},
	note = {Pages: 398},
	file = {Kuribayashi et al. - 2018 - Data Hiding for Text Document in PDF File.pdf:/Users/tullsen/Zotero/storage/EDLFKIA7/Kuribayashi et al. - 2018 - Data Hiding for Text Document in PDF File.pdf:application/pdf},
}

@book{jakobsEffectiveStandardizationManagement2016,
	series = {Advances in {IT} {Standards} and {Standardization} {Research}},
	title = {Effective {Standardization} {Management} in {Corporate} {Settings}:},
	isbn = {978-1-4666-9737-9 978-1-4666-9738-6},
	shorttitle = {Effective {Standardization} {Management} in {Corporate} {Settings}},
	url = {http://services.igi-global.com/resolvedoi/resolve.aspx?doi=10.4018/978-1-4666-9737-9},
	abstract = {The new open world requires new approaches for IT development, and in particular for open standards. In the old world an average development time of 36 months (for European standards) is current practice, but not aligned with current and future needs. This chapter introduces a pressure cooker approach in which openness plays a crucial role in the development leading to an open standards with positive effects on adoption. Several case studies show that the average development time is reduced to an average of 25 weeks, that the approach is still improvable, and that users of the developed standards are to a certain extend satisfied with the quality of the standard. The pressure cooker concept fits within the development and management model of open standards (BOMOS) mainly for dealing with the operational development steps.},
	language = {English},
	urldate = {2021-06-11},
	publisher = {IGI Global},
	editor = {Jakobs, Kai and Jakobs, Kai},
	year = {2016},
	doi = {10.4018/978-1-4666-9737-9},
}

@inproceedings{gopaldinneOverviewPDFMalware2021,
	address = {London, United Kingdom},
	title = {Overview of {PDF} {Malware} {Classifiers}},
	url = {https://ieeexplore.ieee.org/abstract/document/9445341},
	doi = {10.1109/ICIEM51511.2021.9445341},
	abstract = {PDF means Portable Document Format. It is a commonly used format for data exchange nowadays. Though, it turns out to be the most striking to the target for misuse by malware designer and vulnerability researcher. PDF which contains malicious based attacks mainly targeting individuals, Financial sectors and government official. The existing tools like IDS mean Intrusion Detection System and packages like antivirus are inefficient to overcome these kinds of attacks. In this paper, we discussed about PDF structure file structure and its working and we have discussed some classifiers related to malware detection.},
	language = {English},
	booktitle = {2021 2nd {International} {Conference} on {Intelligent} {Engineering} and {Management} ({ICIEM})},
	publisher = {IEEE},
	author = {Gopaldinne, Sanjeev Reddy and Kaur, Harpreet and Kaur, Pawandeep and Kaur, Gunseerat and {Madhuri}},
	month = apr,
	year = {2021},
	keywords = {Portable document format, Neural networks, Tools, Portable Document Format (PDF), Artificial Neural Networks (ANN), Government, Handheld computers, Intrusion detection, K-Nearest Neighbours (KNN), Machine learning (ML), Naive Bayes (NB), Support Vector Machine (SVM), Support vector machine classification},
	pages = {337--341},
	file = {Gopaldinne et al. - 2021 - Overview of PDF Malware Classifiers.pdf:/Users/tullsen/Zotero/storage/UCHW2ZEP/Gopaldinne et al. - 2021 - Overview of PDF Malware Classifiers.pdf:application/pdf},
}

@inproceedings{beyetteSemiAutomaticLaTeXBasedLabeling2019,
	address = {New York, NY, USA},
	series = {{DocEng} '19},
	title = {Semi-{Automatic} {LaTeX}-{Based} {Labeling} of {Mathematical} {Objects} in {PDF} {Documents}: {MOP} {Data} {Set}},
	isbn = {978-1-4503-6887-2},
	shorttitle = {Semi-{Automatic} {LaTeX}-{Based} {Labeling} of {Mathematical} {Objects} in {PDF} {Documents}},
	url = {https://doi.org/10.1145/3342558.3345426},
	doi = {10.1145/3342558.3345426},
	abstract = {Mathematical objects (MO) in PDF documents is paramount in understanding the ontology and mathematical essence in published science, technology, engineering, and mathematical (STEM) documents. As of now, Marmot is the only publicly available data set for optimizing and evaluating MO labeling models in PDF documents. Thus, this paper proposes a semiautomatic labeling MO algorithm that uses PDF documents and their corresponding LaTeX source files to generate a new data set consisting of MO bounding boxes (Bbox) in PDF documents, their LaTeX equation, topic, and subject. The first step in labeling each MO is to transform the LaTeX and PDF documents into a string format. Afterwards, a shortest unique string-matching technique is proposed to align PDF pages with LaTeX files. On each page, a similar shortest string-matching technique is employed to align each LaTeX MO with its PDF counterpart. Once an MO is located, the PDF and LaTeX MOs are normalized in order to match symbols between their LaTeX and PDF representations. A number of filtering rules are set to eliminate matches that are considered exceedingly inconsistent. Matches that pass these rules will have their MOs highlighted for final manual inspection. A total of 1,802 pages in the high energy physics (hep-th) field were labelled.1},
	urldate = {2021-06-05},
	booktitle = {Proceedings of the {ACM} {Symposium} on {Document} {Engineering} 2019},
	publisher = {Association for Computing Machinery},
	author = {Beyette, Donald and Wang, Zelun and Lin, Jason and Liu, Jyh-Charn},
	month = sep,
	year = {2019},
	keywords = {PDF, ground truth, LaTeX, Mathematical object, ontology, semi-automatic labeling},
	pages = {1--4},
	file = {Beyette et al. - 2019 - Semi-Automatic LaTeX-Based Labeling of Mathematica.pdf:/Users/tullsen/Zotero/storage/FDD4DJJL/Beyette et al. - 2019 - Semi-Automatic LaTeX-Based Labeling of Mathematica.pdf:application/pdf},
}

@inproceedings{vonseggernMoreJustDigital2019,
	address = {New York, NY, USA},
	series = {{DocEng} '19},
	title = {More than just digital paper-hidden features of the {PDF} format},
	isbn = {978-1-4503-6887-2},
	url = {https://doi.org/10.1145/3342558.3351873},
	doi = {10.1145/3342558.3351873},
	abstract = {PDF has long been established as the de facto format for the exchange of print-oriented documents and is known for its robust visual presentation across a variety of operating systems and platforms. However, relatively few users are familiar with the format's newer features, such as tagging, forms and security. This tutorial aims to give an overview of the most important of these features and demonstrate the benefits of creating and exchanging PDF files that make use of them.},
	urldate = {2021-06-05},
	booktitle = {Proceedings of the {ACM} {Symposium} on {Document} {Engineering} 2019},
	publisher = {Association for Computing Machinery},
	author = {von Seggern, Dietrich and Posselt, Klaas and Hassan, Tamir and Zellmann, Thomas},
	month = sep,
	year = {2019},
	keywords = {PDF, Document formats, Machine-readable structure, Tagging, Workflows},
	pages = {1--2},
	file = {von Seggern et al. - 2019 - More than just digital paper-hidden features of th.pdf:/Users/tullsen/Zotero/storage/YZ8HYUHN/von Seggern et al. - 2019 - More than just digital paper-hidden features of th.pdf:application/pdf},
}

@inproceedings{buchgraberFO3DFormattingObjects2010,
	address = {New York, NY, USA},
	series = {{Web3D} '10},
	title = {{FO3D}: formatting objects for {PDF3D}},
	isbn = {978-1-4503-0209-8},
	shorttitle = {{FO3D}},
	url = {https://doi.org/10.1145/1836049.1836059},
	doi = {10.1145/1836049.1836059},
	abstract = {3D is useful in many real-world applications beyond computer games. The efficiency of communication is greatly enhanced by combining interlinked verbal descriptions with 3D content. However, there is a wide gap between the great demand for 3D content and the inconvenience and cost of delivering it. We propose using PDF, which is extremely well supported by standard content production workflows. Producing PDF with embedded 3D is currently not an easy task. As a solution to the problem we offer a freely available tool that makes embedding 3D in PDF documents an easy to use technology. Our solution is very flexible, extensible, and can be easily integrated with existing document workflow technology.},
	urldate = {2021-06-05},
	booktitle = {Proceedings of the 15th {International} {Conference} on {Web} {3D} {Technology}},
	publisher = {Association for Computing Machinery},
	author = {Buchgraber, Gerald and Berndt, René and Havemann, Sven and Fellner, Dieter W.},
	month = jul,
	year = {2010},
	keywords = {PDF 3D, XSL-FO},
	pages = {63--72},
	file = {Full Text PDF:/Users/tullsen/Zotero/storage/Y57JYCTA/Buchgraber et al. - 2010 - FO3D formatting objects for PDF3D.pdf:application/pdf},
}

@inproceedings{stroblPublishing3DContent2009,
	address = {Goslar, DEU},
	series = {{VAST}'09},
	title = {Publishing {3D} content as {PDF} in cultural heritage},
	isbn = {978-3-905674-18-7},
	url = {https://dl.acm.org/doi/10.5555/2384470.2384490},
	abstract = {Sharing 3D models with embedded annotations and additional information in a generally accessible way still is a major challange. Using 3D technology must become much easier, in particular in areas such as Cultural Heritage, where archeologists, art historians, and museum curators rely on robust, easy to use solutions. Sustainable exchange standards are vital since unlike in industry, no sophisticated PLM or PDM solutions are common in CH. To solve this problem we have examined the PDF file format and developed concepts and software for the exchange of annotated 3D models in a way that is not just comfortable but also sustainable. We show typical use cases for authoring and using PDF documents containing annotated 3D geometry. The resulting workflow is efficient and suitable for experienced users as well as for users working only with standard word processing tools and e-mail clients (plus, currently, Acrobat Pro Extended).},
	urldate = {2021-06-05},
	booktitle = {Proceedings of the 10th {International} conference on {Virtual} {Reality}, {Archaeology} and {Cultural} {Heritage}},
	publisher = {Eurographics Association},
	author = {Strobl, M. and Berndt, R. and Settgast, V. and Havemann, S. and Fellner, D. W.},
	month = sep,
	year = {2009},
	pages = {117--124},
}

@inproceedings{ricoPossibilitiesApplication3DPDF2014,
	address = {New York, NY, USA},
	series = {{TEEM} '14},
	title = {Possibilities of application of {3D}-{PDF} documents to represent models and tridimensional images in medicine},
	isbn = {978-1-4503-2896-8},
	url = {https://doi.org/10.1145/2669711.2669878},
	doi = {10.1145/2669711.2669878},
	abstract = {We present an interactive 3D visualization tool based on the conventional two-dimensional images. In this study we used a 3D lumbar spine model reconstructed from 2D magnetic resonance imaging and imported it into an interactive PDF document. This tool could have a wide field of application; both undergraduate studies to understand in a more graphically the anatomy of certain parts of body, such as for graduate studies, useful for both medical and surgical specialties and those in which the understanding of complex anatomical regions is required.},
	urldate = {2021-06-05},
	booktitle = {Proceedings of the {Second} {International} {Conference} on {Technological} {Ecosystems} for {Enhancing} {Multiculturality}},
	publisher = {Association for Computing Machinery},
	author = {Rico, Roberto D. Tabernero and Méndez, Juan A. Juanes and Mavar-Haramija, Marija and Perticone, Miguel A. Reina and Prats-Galino, Alberto},
	month = oct,
	year = {2014},
	note = {https://dl.acm.org/doi/10.1145/2669711.2669878},
	keywords = {anatomical models, lumbar spine, PDF-3D, visualization tool},
	pages = {53--56},
	file = {Rico et al. - 2014 - Possibilities of application of 3D-PDF documents t.pdf:/Users/tullsen/Zotero/storage/DNH9UXGR/Rico et al. - 2014 - Possibilities of application of 3D-PDF documents t.pdf:application/pdf},
}

@inproceedings{mavar-haramijaSimulationSurgicalProcedures2014,
	address = {New York, NY, USA},
	series = {{TEEM} '14},
	title = {Simulation of surgical procedures and associated quantification methods by {3D} {PDF} documents},
	isbn = {978-1-4503-2896-8},
	url = {https://doi.org/10.1145/2669711.2669879},
	doi = {10.1145/2669711.2669879},
	abstract = {The goal of the present study was to create the tools for the simulation of surgical procedures (extended endoscopic endonasal approaches) and the step-by-step simulation of quantification methods of such procedures. This was achieved by the reconstruction of three-dimensional (3D) geometrical models of anatomical structures from the pre- and post-dissection head CT images, which were then used to perform the quantitative analysis of bone removal and calculate the quantification parameters (surgical freedom and exposure area) of each procedure. The results are presented in the interactive 3D PDF documents which explain the surgery steps and quantitative analysis in an illustrative and simple way, can be used off-line, are free of charge and don't require the installation of any commercial software.},
	urldate = {2021-06-05},
	booktitle = {Proceedings of the {Second} {International} {Conference} on {Technological} {Ecosystems} for {Enhancing} {Multiculturality}},
	publisher = {Association for Computing Machinery},
	author = {Mavar-Haramija, Marija and Prats-Galino, Alberto and Méndez, Juan A. Juanes and Puigdelívoll-Sánchez, Anna and de Notaris, Matteo},
	month = oct,
	year = {2014},
	note = {https://dl.acm.org/doi/10.1145/2669711.2669879},
	keywords = {JavaScript, simulation, 3d anatomical models, endoscopic endonasal surgery, PDF3D, skull base anatomy, surgical planning},
	pages = {57--62},
	file = {Mavar-Haramija et al. - 2014 - Simulation of surgical procedures and associated q.pdf:/Users/tullsen/Zotero/storage/J7CSMPFQ/Mavar-Haramija et al. - 2014 - Simulation of surgical procedures and associated q.pdf:application/pdf},
}

@inproceedings{mavar-haramija3DPDFTechnology2013,
	address = {New York, NY, USA},
	series = {{TEEM} '13},
	title = {{3D} {PDF} technology combined with {JavaScript} functions enables the creation and visualization of interactive {3D} presentations},
	isbn = {978-1-4503-2345-1},
	url = {https://doi.org/10.1145/2536536.2536548},
	doi = {10.1145/2536536.2536548},
	abstract = {We present innovative tools for the visualization and creation of interactive 3D PDF presentations containing anatomical models. In this study we used a 3D embryonic craniofacial model reconstructed from 2D microscopic section images and imported it into an interactive PDF document, where it is managed by a set of JavaScript-based functions. Our documents serve two purposes: firstly, they enable a user to explore a 3D model, in a user-friendly and intuitive way examining anatomical structures, and secondly, they offer the possibility of creating a custom collection of slides containing model views and explanatory anatomical descriptions, later used in the form of a teaching presentation.},
	urldate = {2021-06-05},
	booktitle = {Proceedings of the {First} {International} {Conference} on {Technological} {Ecosystem} for {Enhancing} {Multiculturality}},
	publisher = {Association for Computing Machinery},
	author = {Mavar-Haramija, Marija and Prats-Galino, Alberto and Escuder, Clara Berenguer and Juanes Méndez, Juan A. and Puigdelívoll-Sánchez, Anna},
	month = nov,
	year = {2013},
	note = {https://dl.acm.org/doi/10.1145/2536536.2536548},
	keywords = {3D PDF, JavaScript, anatomical models, education, embryology, neuroanatomy},
	pages = {67--72},
	file = {Mavar-Haramija et al. - 2013 - 3D PDF technology combined with JavaScript functio.pdf:/Users/tullsen/Zotero/storage/9VE4AWAQ/Mavar-Haramija et al. - 2013 - 3D PDF technology combined with JavaScript functio.pdf:application/pdf},
}

@article{elbashtiRolePortableDocumentation2018,
	title = {The {Role} of {Portable} {Documentation} {Format} in {Three}-{Dimensional} {Interactive} {Visualization} in {Maxillofacial} {Prosthetics}},
	volume = {31},
	issn = {0893-2174},
	url = {https://pubmed.ncbi.nlm.nih.gov/29590664/},
	doi = {10.11607/ijp.5475},
	abstract = {Although digital technology has advanced the visualization of treatment planning and rehabilitation in prosthodontics, the field of maxillofacial prosthetics is in vital need of an accessible document for exchange of interactive three-dimensional (3D) model visualization without requiring installation of any additional software. This article introduces a 3D data documentation method for effective interactive digital visualization in maxillofacial prosthetics using a portable documentation format (PDF).},
	language = {eng},
	number = {4},
	journal = {The International Journal of Prosthodontics},
	author = {Elbashti, Mahmoud E. and Aswehlee, Amel M. and Sumita, Yuka I. and Hattori, Mariko and Taniguchi, Hisashi},
	month = aug,
	year = {2018},
	pmid = {29590664},
	keywords = {Software, Humans, Imaging, Three-Dimensional, Documentation, Maxilla, Maxillofacial Prosthesis},
	pages = {399--400},
}

@article{danzThreedimensionalPortableDocument2011,
	title = {Three-dimensional portable document format: a simple way to present 3-dimensional data in an electronic publication},
	volume = {140},
	issn = {1097-6752},
	shorttitle = {Three-dimensional portable document format},
	url = {https://pubmed.ncbi.nlm.nih.gov/21803267/},
	doi = {10.1016/j.ajodo.2011.04.010},
	abstract = {Three-dimensional (3D) models of teeth and soft and hard tissues are tessellated surfaces used for diagnosis, treatment planning, appliance fabrication, outcome evaluation, and research. In scientific publications or communications with colleagues, these 3D data are often reduced to 2-dimensional pictures or need special software for visualization. The portable document format (PDF) offers a simple way to interactively display 3D surface data without additional software other than a recent version of Adobe Reader (Adobe, San Jose, Calif). The purposes of this article were to give an example of how 3D data and their analyses can be interactively displayed in 3 dimensions in electronic publications, and to show how they can be exported from any software for diagnostic reports and communications among colleagues.},
	language = {eng},
	number = {2},
	journal = {American Journal of Orthodontics and Dentofacial Orthopedics: Official Publication of the American Association of Orthodontists, Its Constituent Societies, and the American Board of Orthodontics},
	author = {Danz, Jan C. and Katsaros, Christos},
	month = aug,
	year = {2011},
	pmid = {21803267},
	keywords = {Software, Humans, Imaging, Three-Dimensional, Dental Informatics, Dental Models, Image Processing, Computer-Assisted, Publishing},
	pages = {274--276},
}

@article{phelpsEmbedding3DRadiology2012,
	title = {Embedding {3D} radiology models in portable document format},
	volume = {199},
	issn = {1546-3141},
	url = {https://pubmed.ncbi.nlm.nih.gov/23169728/},
	doi = {10.2214/AJR.12.8716},
	abstract = {OBJECTIVE: The purpose of this article is to discuss how to convert cross-sectional images into a 3D model and embed them in a Portable Document Format (PDF) file. Four programs are used: OsiriX, MeshLab, Microsoft PowerPoint, and Adobe Acrobat. Step-by-step instructions are provided.
CONCLUSION: Embedding 3D radiology models into PDF files is a powerful tool that may be used for clinical, educational, and research purposes.},
	language = {eng},
	number = {6},
	journal = {AJR. American journal of roentgenology},
	author = {Phelps, Andrew and Naeger, David M. and Marcovici, Peter},
	month = dec,
	year = {2012},
	pmid = {23169728},
	keywords = {Software, Humans, Imaging, Three-Dimensional, Electronic Data Processing, Information Storage and Retrieval, Radiology Information Systems, User-Computer Interface},
	pages = {1342--1344},
	file = {Phelps et al. - 2012 - Embedding 3D radiology models in portable document.pdf:/Users/tullsen/Zotero/storage/PGHYAZQG/Phelps et al. - 2012 - Embedding 3D radiology models in portable document.pdf:application/pdf},
}

@article{barnesEmbeddingPublishingInteractive2013,
	title = {Embedding and publishing interactive, 3-dimensional, scientific figures in {Portable} {Document} {Format} ({PDF}) files},
	volume = {8},
	issn = {1932-6203},
	url = {https://pubmed.ncbi.nlm.nih.gov/24086243/},
	doi = {10.1371/journal.pone.0069446},
	abstract = {With the latest release of the S2PLOT graphics library, embedding interactive, 3-dimensional (3-d) scientific figures in Adobe Portable Document Format (PDF) files is simple, and can be accomplished without commercial software. In this paper, we motivate the need for embedding 3-d figures in scholarly articles. We explain how 3-d figures can be created using the S2PLOT graphics library, exported to Product Representation Compact (PRC) format, and included as fully interactive, 3-d figures in PDF files using the movie15 LaTeX package. We present new examples of 3-d PDF figures, explain how they have been made, validate them, and comment on their advantages over traditional, static 2-dimensional (2-d) figures. With the judicious use of 3-d rather than 2-d figures, scientists can now publish, share and archive more useful, flexible and faithful representations of their study outcomes. The article you are reading does not have embedded 3-d figures. The full paper, with embedded 3-d figures, is recommended and is available as a supplementary download from PLoS ONE (File S2).},
	language = {eng},
	number = {9},
	journal = {PloS One},
	author = {Barnes, David G. and Vidiassov, Michail and Ruthensteiner, Bernhard and Fluke, Christopher J. and Quayle, Michelle R. and McHenry, Colin R.},
	year = {2013},
	pmid = {24086243},
	pmcid = {PMC3783416},
	keywords = {Software, Imaging, Three-Dimensional, Publishing, Programming Languages},
	pages = {e69446},
	file = {Barnes et al. - 2013 - Embedding and publishing interactive, 3-dimensiona.pdf:/Users/tullsen/Zotero/storage/64ZFLKYH/Barnes et al. - 2013 - Embedding and publishing interactive, 3-dimensiona.pdf:application/pdf},
}

@article{ruthensteinerEmbedding3DModels2008,
	title = {Embedding {3D} models of biological specimens in {PDF} publications},
	volume = {71},
	issn = {1097-0029},
	url = {https://pubmed.ncbi.nlm.nih.gov/18785246/},
	doi = {10.1002/jemt.20618},
	abstract = {By providing two examples, the option for embedding 3D models in electronic versions of life science publications is presented. These examples, presumably representing the first such models published, are developmental stages of an evertebrate (Patella caerulea, Mollusca) and a vertebrate species (Psetta maxima, Teleostei) obtained from histological section series reconstruction processed with the software package Amira. These surface rendering models are particularly suitable for a PDF file because they can easily be transformed to a file format required and components may be conveniently combined and hierarchically arranged. All methodological steps starting from specimen preparation until embedding of resulting models in PDF files with emphasis on conversion of Amira data to the appropriate 3D file format are explained. Usability of 3D models in PDF documents is exemplified and advantages over 2D illustrations are discussed, including better explanation capabilities for spatial arrangements, higher information contents, and limiting options for disguising results by authors. Possibilities for additional applications reaching far beyond the examples presented are suggested. Problems such as long-term compatibility of file format and hardware plus software, editing and embedding of files, file size and differences in information contents between printed and electronic version will likely be overcome by technical development and increasing tendency toward electronic at the cost of printed publications. Since 3D visualization plays an increasing role in manifold disciplines of science and appropriate tools for the popular PDF format are readily available, we propose routine application of this way of illustration in electronic life science papers.},
	language = {eng},
	number = {11},
	journal = {Microscopy Research and Technique},
	author = {Ruthensteiner, Bernhard and Hess, Martin},
	month = nov,
	year = {2008},
	pmid = {18785246},
	keywords = {Imaging, Three-Dimensional, Publishing, Animals, Flatfishes, Models, Biological, Patella},
	pages = {778--786},
}

@article{ruthensteinerInteractive3DVolume2010,
	title = {Interactive {3D} volume rendering in biomedical publications},
	volume = {41},
	issn = {1878-4291},
	url = {https://pubmed.ncbi.nlm.nih.gov/20562000/},
	doi = {10.1016/j.micron.2010.03.010},
	abstract = {We present three examples of interactive, 3D volume rendering models embedded in a PDF publication. The examples are drawn from three different morphological methods - confocal microscopy, serial sectioning and microcomputed tomography - performed on members of the phylum Mollusca. A description of the entire technical procedure from specimen preparation to embedding of the visual model including 3D labels in the document is provided. For comparison, volume rendering with standard visualization software, and surface rendering incorporated in the 3D PDF figures, are provided. The principal advantages and disadvantages of the techniques and models are discussed. Volume rendering for serial sections is relatively work-intensive, while confocal data have limitations in terms of 3D presentation. Volume renderings are normally downsampled in resolution to achieve a reasonable PDF file size, however intentional information is largely retained. We conclude that volume rendering of 3D data sets is a valuable technique and should become standard in PDF versions of biomedical publications.},
	language = {eng},
	number = {7},
	journal = {Micron (Oxford, England: 1993)},
	author = {Ruthensteiner, Bernhard and Baeumler, Natalie and Barnes, David G.},
	month = oct,
	year = {2010},
	pmid = {20562000},
	keywords = {Imaging, Three-Dimensional, Publishing, Animals, Microscopy, Confocal, Microtomy, Mollusca, X-Ray Microtomography},
	pages = {886.e1--886.e17},
	file = {Ruthensteiner et al. - 2010 - Interactive 3D volume rendering in biomedical publ.pdf:/Users/tullsen/Zotero/storage/RZHEPLDM/Ruthensteiner et al. - 2010 - Interactive 3D volume rendering in biomedical publ.pdf:application/pdf},
}

@article{rosenNovelApproachDesign2021,
	title = {A {Novel} {Approach} to {Design} {3D} {Models} in {Medical} {Education}},
	issn = {2156-8650},
	url = {https://pubmed.ncbi.nlm.nih.gov/33777487/},
	doi = {10.1007/s40670-021-01262-6},
	abstract = {In this paper, we describe a novel process for creating high-resolution, 3D PDF, 3D printed, and holographic anatomic and pathology models using inexpensive consumer grade electronics, which can be incorporated in any undergraduate or graduate medical school curriculum.
Supplementary Information: The online version contains supplementary material available at 10.1007/s40670-021-01262-6.},
	language = {eng},
	journal = {Medical Science Educator},
	author = {Rosen, Daniel and Nesic, Olivera},
	month = mar,
	year = {2021},
	pmid = {33777487},
	pmcid = {PMC7983107},
	keywords = {Medical education, 3D models, Anatomy, Hologram, Holographic pyramid, Pathology},
	pages = {1--2},
	file = {Rosen and Nesic - 2021 - A Novel Approach to Design 3D Models in Medical Ed.pdf:/Users/tullsen/Zotero/storage/45GCMANJ/Rosen and Nesic - 2021 - A Novel Approach to Design 3D Models in Medical Ed.pdf:application/pdf},
}

@article{ganesanIntramolecularInteractionsLphenylalanine2011,
	title = {Intramolecular interactions of {L}-phenylalanine: {Valence} ionization spectra and orbital momentum distributions of its fragment molecules},
	volume = {32},
	issn = {1096-987X},
	shorttitle = {Intramolecular interactions of {L}-phenylalanine},
	url = {https://pubmed.ncbi.nlm.nih.gov/20806261/},
	doi = {10.1002/jcc.21639},
	abstract = {Intramolecular interactions between fragments of L-phenylalanine, i.e., phenyl and alaninyl, have been investigated using dual space analysis (DSA) quantum mechanically. Valence space photoelectron spectra (PES), orbital energy topology and correlation diagram, as well as orbital momentum distributions (MDs) of L-phenylalanine, benzene and L-alanine are studied using density functional theory methods. While fully resolved experimental PES of L-phenylalanine is not yet available, our simulated PES reproduces major features of the experimental measurement. For benzene, the simulated orbital MDs for 1e(1g) and 1a(2u) orbitals also agree well with those measured using electron momentum spectra. Our theoretical models are then applied to reveal intramolecular interactions of the species on an orbital base, using DSA. Valence orbitals of L-phenylalanine can be essentially deduced into contributions from its fragments such as phenyl and alaninyl as well as their interactions. The fragment orbitals inherit properties of their parent species in energy and shape (ie., MDs). Phenylalanine orbitals show strong bonding in the energy range of 14-20 eV, rather than outside of this region. This study presents a competent orbital based fragments-in-molecules picture in the valence space, which supports the fragment molecular orbital picture and building block principle in valence space. The optimized structures of the molecules are represented using the recently developed interactive 3D-PDF technique.},
	language = {eng},
	number = {3},
	journal = {Journal of Computational Chemistry},
	author = {Ganesan, Aravindhan and Wang, Feng and Falzon, Chantal},
	month = feb,
	year = {2011},
	pmid = {20806261},
	keywords = {Alanine, Benzene, Ions, Models, Molecular, Phenylalanine, Quantum Theory},
	pages = {525--535},
}

@article{deboerInteractivePresentation3D2011,
	title = {The interactive presentation of {3D} information obtained from reconstructed datasets and {3D} placement of single histological sections with the {3D} portable document format},
	volume = {138},
	issn = {1477-9129},
	url = {https://pubmed.ncbi.nlm.nih.gov/21138978/},
	doi = {10.1242/dev.051086},
	abstract = {Interpretation of the results of anatomical and embryological studies relies heavily on proper visualization of complex morphogenetic processes and patterns of gene expression in a three-dimensional (3D) context. However, reconstruction of complete 3D datasets is time consuming and often researchers study only a few sections. To help in understanding the resulting 2D data we developed a program (TRACTS) that places such arbitrary histological sections into a high-resolution 3D model of the developing heart. The program places sections correctly, robustly and as precisely as the best of the fits achieved by five morphology experts. Dissemination of 3D data is severely hampered by the 2D medium of print publication. Many insights gained from studying the 3D object are very hard to convey using 2D images and are consequently lost or cannot be verified independently. It is possible to embed 3D objects into a pdf document, which is a format widely used for the distribution of scientific papers. Using the freeware program Adobe Reader to interact with these 3D objects is reasonably straightforward; creating such objects is not. We have developed a protocol that describes, step by step, how 3D objects can be embedded into a pdf document. Both the use of TRACTS and the inclusion of 3D objects in pdf documents can help in the interpretation of 2D and 3D data, and will thus optimize communication on morphological issues in developmental biology.},
	language = {eng},
	number = {1},
	journal = {Development (Cambridge, England)},
	author = {de Boer, Bouke A. and Soufan, Alexandre T. and Hagoort, Jaco and Mohun, Timothy J. and van den Hoff, Maurice J. B. and Hasman, Arie and Voorbraak, Frans P. J. M. and Moorman, Antoon F. M. and Ruijter, Jan M.},
	month = jan,
	year = {2011},
	pmid = {21138978},
	pmcid = {PMC2998169},
	keywords = {Software, Humans, Imaging, Three-Dimensional, Image Processing, Computer-Assisted, Animals, Databases, Factual},
	pages = {159--167},
	file = {de Boer et al. - 2011 - The interactive presentation of 3D information obt.pdf:/Users/tullsen/Zotero/storage/FBRU4LV3/de Boer et al. - 2011 - The interactive presentation of 3D information obt.pdf:application/pdf},
}

@article{selvamMethylationZebularineQuantum2009,
	title = {Methylation of zebularine: a quantum mechanical study incorporating interactive {3D} pdf graphs},
	volume = {113},
	issn = {1520-6106},
	shorttitle = {Methylation of zebularine},
	url = {https://pubmed.ncbi.nlm.nih.gov/19637931/},
	doi = {10.1021/jp901678g},
	abstract = {Methylation of a cytidine deaminase inhibitor, 1-(beta-D-ribofuranosyl)-2-pyrimidone (i.e., zebularine (zeb)), which produces 1-(beta-D-ribofuranosyl)-5-methyl-2-pyrimidinone (d5), has been investigated using density functional theory models. The optimized structures of zeb and d5 and the valence orbitals primarily responsible for the methylation in d5 are presented using state-of-the-art interactive (on a computer or online) three-dimensional (3D) graphics in a portable document format (pdf) file, 3D-PDF (http://www.web3d.org/x3d/vrml/ ). The facility to embed 3D molecular structures into pdf documents has been developed jointly at Swinburne University of Technology and the National Computational Infrastructure, the Australian National University. The methyl fragment in the base moiety shows little effect on the sugar puckering but apparently affects anisotropic properties, such as condensed Fukui functions. Binding energy spectra, both valence space and core space, are noticeably affected; in particular, in the outer-valence space (e.g., IP {\textless} 20 eV). The methyl fragment delocalizes and diffuses into almost all valence space, but orbitals 8 (57a, IP = 12.57 eV), 18 (47a, IP = 14.70 eV), and 37 (28a, IP = 22.15 eV) are identified as fingerprint for the methyl fragment. In the inner shell, however, the impact of the methyl can be localized and identified by chemical shift. A small, global, red shift is found for the O-K, N-K and sugar C-K spectra, whereas the base C-K spectrum exhibits apparent methyl-related changes.},
	language = {eng},
	number = {33},
	journal = {The Journal of Physical Chemistry. B},
	author = {Selvam, Lalitha and Vasilyev, Vladislav and Wang, Feng},
	month = aug,
	year = {2009},
	pmid = {19637931},
	keywords = {Models, Molecular, Quantum Theory, Computer Graphics, Cytidine, Methylation, Molecular Structure},
	pages = {11496--11504},
}

@article{kumarLeavingStructuralIvory2010,
	title = {Leaving the structural ivory tower, assisted by interactive {3D} {PDF}},
	volume = {35},
	issn = {0968-0004},
	url = {https://pubmed.ncbi.nlm.nih.gov/20541422/},
	doi = {10.1016/j.tibs.2010.03.008},
	abstract = {The ability to embed interactive three-dimensional (3D) models into electronic publications in portable document format (PDF) greatly enhances the accessibility of molecular structures. Here, we report advances in this procedure and discuss what is needed to develop this format into a truly useful tool for the structural biology community as well as for readers who are less well trained in molecular visualization.},
	language = {eng},
	number = {8},
	journal = {Trends in Biochemical Sciences},
	author = {Kumar, Pravin and Ziegler, Alexander and Grahn, Alexander and Hee, Chee Seng and Ziegler, Andreas},
	month = aug,
	year = {2010},
	pmid = {20541422},
	keywords = {Internet, Computational Biology, Publishing, Biochemistry, Molecular Conformation},
	pages = {419--422},
}

@article{tesarovaInteractiveIntuitiveVisualisation2019,
	title = {An interactive and intuitive visualisation method for {X}-ray computed tomography data of biological samples in {3D} {Portable} {Document} {Format}},
	volume = {9},
	issn = {2045-2322},
	url = {https://pubmed.ncbi.nlm.nih.gov/31624273/},
	doi = {10.1038/s41598-019-51180-2},
	abstract = {3D imaging approaches based on X-ray microcomputed tomography (microCT) have become increasingly accessible with advancements in methods, instruments and expertise. The synergy of material and life sciences has impacted biomedical research by proposing new tools for investigation. However, data sharing remains challenging as microCT files are usually in the range of gigabytes and require specific and expensive software for rendering and interpretation. Here, we provide an advanced method for visualisation and interpretation of microCT data with small file formats, readable on all operating systems, using freely available Portable Document Format (PDF) software. Our method is based on the conversion of volumetric data into interactive 3D PDF, allowing rotation, movement, magnification and setting modifications of objects, thus providing an intuitive approach to analyse structures in a 3D context. We describe the complete pipeline from data acquisition, data processing and compression, to 3D PDF formatting on an example of craniofacial anatomical morphology in the mouse embryo. Our procedure is widely applicable in biological research and can be used as a framework to analyse volumetric data from any research field relying on 3D rendering and CT-biomedical imaging.},
	language = {eng},
	number = {1},
	journal = {Scientific Reports},
	author = {Tesařová, Markéta and Heude, Eglantine and Comai, Glenda and Zikmund, Tomáš and Kaucká, Markéta and Adameyko, Igor and Tajbakhsh, Shahragim and Kaiser, Jozef},
	month = oct,
	year = {2019},
	pmid = {31624273},
	pmcid = {PMC6797759},
	keywords = {Software, Imaging, Three-Dimensional, Information Dissemination, Mice, Electronic Data Processing, Animals, X-Ray Microtomography, Data Compression, Facial Bones, Models, Anatomic, Radiographic Image Interpretation, Computer-Assisted, Skull},
	pages = {14896},
	file = {Tesařová et al. - 2019 - An interactive and intuitive visualisation method .pdf:/Users/tullsen/Zotero/storage/XR9LP2AM/Tesařová et al. - 2019 - An interactive and intuitive visualisation method .pdf:application/pdf},
}

@article{meleReviewMainSurgical2020,
	title = {Review of the main surgical and angiographic-oriented classifications of the course of the internal carotid artery through a novel interactive {3D} model},
	volume = {43},
	issn = {1437-2320},
	url = {https://pubmed.ncbi.nlm.nih.gov/30051302/},
	doi = {10.1007/s10143-018-1012-7},
	abstract = {The course of the internal carotid artery (ICA) and its segment classifications were reviewed by means of a new and freely available 3D interactive model of the artery and the skull base, based on human neuroimages, that can be freely downloaded at the Public Repository of the University of Barcelona (http://diposit.ub.edu/dspace/handle/2445/112442) and runs under Acrobat Reader in Mac and Windows computers and Windows 10 tablets. The 3D-PDF allows zoom, rotation, selective visualization of structures, and a predefined sequence view. Illustrative images of the different classifications were obtained. Fischer (Zentralbl Neurochir 3:300-313, 1938) described five segments in the opposite direction to the blood flow. Gibo-Rothon (J Neurosurg 55:560-574, 1981) follow the blood flow, incorporated the cervical and petrous portions, and divided the subarachnoid course-supraclinoid-in ophthalmic, communicating, and choroidal segments, enhancing transcranial microscopic approaches. Bouthillier (Neurosurgery 38:425-433, 1996) divided the petrous portion describing the lacerum segment (exposed in transfacial procedures and exploration of Meckel's cave) and added the clinoid segment between the proximal and distal dural rings, of interest in cavernous sinus surgery. The Kassam's group (2014), with an endoscopic endonasal perspective, introduces the "paraclival segment," including the "lacerum segment" and part of the intracavernous ICA, and details surgical landmarks to minimize the risk of injury. Other classifications are also analyzed. This review through an interactive 3D tool provides virtual views of the ICA and becomes an innovative perspective to the segment classifications and neuroanatomy of the ICA and surrounding structures.},
	language = {eng},
	number = {2},
	journal = {Neurosurgical Review},
	author = {Melé, Marc Valera and Puigdellívol-Sánchez, Anna and Mavar-Haramija, Marija and Juanes-Méndez, Juan A. and Román, Luis San and De Notaris, Matteo and Catapano, Giuseppe and Prats-Galino, Alberto},
	month = apr,
	year = {2020},
	pmid = {30051302},
	keywords = {Humans, Models, Anatomic, 3D angiography, 3D-PDF document, Anatomic models, Angiography, Carotid Artery, Internal, Computed Tomography Angiography, CT angiography, Internal carotid artery segments, Neurosurgical Procedures, Skull Base, Skull base anatomy},
	pages = {473--482},
}

@article{boedeckerUsingVirtual3Dmodels2021,
	title = {Using virtual {3D}-models in surgical planning: workflow of an immersive virtual reality application in liver surgery},
	volume = {406},
	issn = {1435-2451},
	shorttitle = {Using virtual {3D}-models in surgical planning},
	url = {https://pubmed.ncbi.nlm.nih.gov/33710462/},
	doi = {10.1007/s00423-021-02127-7},
	abstract = {PURPOSE: Three-dimensional (3D) surgical planning is widely accepted in liver surgery. Currently, the 3D reconstructions are usually presented as 3D PDF data on regular monitors. 3D-printed liver models are sometimes used for education and planning.
METHODS: We developed an immersive virtual reality (VR) application that enables the presentation of preoperative 3D models. The 3D reconstructions are exported as STL files and easily imported into the application, which creates the virtual model automatically. The presentation is possible in "OpenVR"-ready VR headsets. To interact with the 3D liver model, VR controllers are used. Scaling is possible, as well as changing the opacity from invisible over transparent to fully opaque. In addition, the surgeon can draw potential resection lines on the surface of the liver. All these functions can be used in a single or multi-user mode.
RESULTS: Five highly experienced HPB surgeons of our department evaluated the VR application after using it for the very first time and considered it helpful according to the "System Usability Scale" (SUS) with a score of 76.6\%. Especially with the subitem "necessary learning effort," it was shown that the application is easy to use.
CONCLUSION: We introduce an immersive, interactive presentation of medical volume data for preoperative 3D liver surgery planning. The application is easy to use and may have advantages over 3D PDF and 3D print in preoperative liver surgery planning. Prospective trials are needed to evaluate the optimal presentation mode of 3D liver models.},
	language = {eng},
	number = {3},
	journal = {Langenbeck's Archives of Surgery},
	author = {Boedecker, Christian and Huettl, Florentine and Saalfeld, Patrick and Paschold, Markus and Kneist, Werner and Baumgart, Janine and Preim, Bernhard and Hansen, Christian and Lang, Hauke and Huber, Tobias},
	month = may,
	year = {2021},
	pmid = {33710462},
	pmcid = {PMC8106601},
	keywords = {3D reconstruction, Liver surgery, Virtual reality},
	pages = {911--915},
	file = {Boedecker et al. - 2021 - Using virtual 3D-models in surgical planning work.pdf:/Users/tullsen/Zotero/storage/E39T9KZG/Boedecker et al. - 2021 - Using virtual 3D-models in surgical planning work.pdf:application/pdf},
}

@article{plosonestaffCorrectionApplicationEvaluation2015,
	title = {Correction: {Application} and evaluation of interactive {3D} {PDF} for presenting and sharing planning results for liver surgery in clinical routine},
	volume = {10},
	issn = {1932-6203},
	shorttitle = {Correction},
	url = {https://pubmed.ncbi.nlm.nih.gov/25790181/},
	doi = {10.1371/journal.pone.0120158},
	language = {eng},
	number = {3},
	journal = {PloS One},
	author = {{PLOS ONE Staff}},
	year = {2015},
	pmid = {25790181},
	pmcid = {PMC4366216},
	pages = {e0120158},
	file = {PLOS ONE Staff - 2015 - Correction Application and evaluation of interact.pdf:/Users/tullsen/Zotero/storage/YSQ7S45T/PLOS ONE Staff - 2015 - Correction Application and evaluation of interact.pdf:application/pdf},
}

@article{neweSimplifiedGenerationBiomedical2013,
	title = {Simplified generation of biomedical {3D} surface model data for embedding into {3D} portable document format ({PDF}) files for publication and education},
	volume = {8},
	issn = {1932-6203},
	url = {https://pubmed.ncbi.nlm.nih.gov/24260144/},
	doi = {10.1371/journal.pone.0079004},
	abstract = {The usefulness of the 3D Portable Document Format (PDF) for clinical, educational, and research purposes has recently been shown. However, the lack of a simple tool for converting biomedical data into the model data in the necessary Universal 3D (U3D) file format is a drawback for the broad acceptance of this new technology. A new module for the image processing and rapid prototyping framework MeVisLab does not only provide a platform-independent possibility to create surface meshes out of biomedical/DICOM and other data and to export them into U3D--it also lets the user add meta data to these meshes to predefine colors and names that can be processed by a PDF authoring software while generating 3D PDF files. Furthermore, the source code of the respective module is available and well documented so that it can easily be modified for own purposes.},
	language = {eng},
	number = {11},
	journal = {PloS One},
	author = {Newe, Axel and Ganslandt, Thomas},
	year = {2013},
	pmid = {24260144},
	pmcid = {PMC3829830},
	keywords = {Software, Imaging, Three-Dimensional, Education, Publications},
	pages = {e79004},
	file = {Newe and Ganslandt - 2013 - Simplified generation of biomedical 3D surface mod.pdf:/Users/tullsen/Zotero/storage/A3Z6H7IJ/Newe and Ganslandt - 2013 - Simplified generation of biomedical 3D surface mod.pdf:application/pdf},
}

@article{wu3DTopographyYoung2015,
	title = {{3D} {Topography} of the {Young} {Adult} {Anal} {Sphincter} {Complex} {Reconstructed} from {Undeformed} {Serial} {Anatomical} {Sections}},
	volume = {10},
	issn = {1932-6203},
	url = {https://pubmed.ncbi.nlm.nih.gov/26305117/},
	doi = {10.1371/journal.pone.0132226},
	abstract = {BACKGROUND: Pelvic-floor anatomy is usually studied by artifact-prone dissection or imaging, which requires prior anatomical knowledge. We used the serial-section approach to settle contentious issues and an interactive 3D-pdf to make the results widely accessible.
METHOD: 3D reconstructions of undeformed thin serial anatomical sections of 4 females and 2 males (21-35y) of the Chinese Visible Human database.
FINDINGS: Based on tendinous septa and muscle-fiber orientation as segmentation guides, the anal-sphincter complex (ASC) comprised the subcutaneous external anal sphincter (EAS) and the U-shaped puborectal muscle, a part of the levator ani muscle (LAM). The anococcygeal ligament fixed the EAS to the coccygeal bone. The puborectal-muscle loops, which define the levator hiatus, passed around the anorectal junction and inserted anteriorly on the perineal body and pubic bone. The LAM had a common anterior attachment to the pubic bone, but separated posteriorly into puborectal and "pubovisceral" muscles. This pubovisceral muscle was bilayered: its internal layer attached to the conjoint longitudinal muscle of the rectum and the rectococcygeal fascia, while its outer, patchy layer reinforced the inner layer. ASC contraction makes the ano-rectal bend more acute and lifts the pelvic floor. Extensions of the rectal longitudinal smooth muscle to the coccygeal bone (rectococcygeal muscle), perineal body (rectoperineal muscle), and endopelvic fascia (conjoint longitudinal and pubovisceral muscles) formed a "diaphragm" at the inferior boundary of the mesorectum that suspended the anorectal junction. Its contraction should straighten the anorectal bend.
CONCLUSION: The serial-section approach settled contentious topographic issues of the pelvic floor. We propose that the ASC is involved in continence and the rectal diaphragm in defecation.},
	language = {eng},
	number = {8},
	journal = {PloS One},
	author = {Wu, Yi and Dabhoiwala, Noshir F. and Hagoort, Jaco and Shan, Jin-Lu and Tan, Li-Wen and Fang, Bin-Ji and Zhang, Shao-Xiang and Lamers, Wouter H.},
	year = {2015},
	pmid = {26305117},
	pmcid = {PMC4549266},
	keywords = {Humans, Imaging, Three-Dimensional, Anal Canal, Female, Male, Muscles, Perineum, Reconstructive Surgical Procedures, Rectum, Young Adult},
	pages = {e0132226},
	file = {Wu et al. - 2015 - 3D Topography of the Young Adult Anal Sphincter Co.pdf:/Users/tullsen/Zotero/storage/FBGV2QUC/Wu et al. - 2015 - 3D Topography of the Young Adult Anal Sphincter Co.pdf:application/pdf},
}

@article{neweEasierCreationThreedimensional2015,
	title = {Towards an easier creation of three-dimensional data for embedding into scholarly {3D} {PDF} ({Portable} {Document} {Format}) files},
	volume = {3},
	issn = {2167-8359},
	url = {https://pubmed.ncbi.nlm.nih.gov/25780759/},
	doi = {10.7717/peerj.794},
	abstract = {The Portable Document Format (PDF) allows for embedding three-dimensional (3D) models and is therefore particularly suitable to communicate respective data, especially as regards scholarly articles. The generation of the necessary model data, however, is still challenging, especially for inexperienced users. This prevents an unrestrained proliferation of 3D PDF usage in scholarly communication. This article introduces a new solution for the creation of three of types of 3D geometry (point clouds, polylines and triangle meshes), that is based on MeVisLab, a framework for biomedical image processing. This solution enables even novice users to generate the model data files without requiring programming skills and without the need for an intensive training by simply using it as a conversion tool. Advanced users can benefit from the full capability of MeVisLab to generate and export the model data as part of an overall processing chain. Although MeVisLab is primarily designed for handling biomedical image data, the new module is not restricted to this domain. It can be used for all scientific disciplines.},
	language = {eng},
	journal = {PeerJ},
	author = {Newe, Axel},
	year = {2015},
	pmid = {25780759},
	pmcid = {PMC4358654},
	keywords = {PDF, Portable Document Format, Geometry, 3D, Line set, Mesh, Model, Point cloud, U3D, Universal 3D},
	pages = {e794},
	file = {Newe - 2015 - Towards an easier creation of three-dimensional da.pdf:/Users/tullsen/Zotero/storage/BDQ3IAEP/Newe - 2015 - Towards an easier creation of three-dimensional da.pdf:application/pdf},
}

@article{valera-meleNovelFreelyAvailable2018,
	title = {A {Novel} and {Freely} {Available} {Interactive} 3d {Model} of the {Internal} {Carotid} {Artery}},
	volume = {42},
	issn = {1573-689X},
	url = {https://pubmed.ncbi.nlm.nih.gov/29508089/},
	doi = {10.1007/s10916-018-0919-4},
	abstract = {We describe a new and freely available 3D interactive model of the intracranial internal carotid artery (ICA) and the skull base that also allows to display and compare its main segment classifications. High-resolution 3D human angiography (isometric voxel's size 0.36 mm) and Computed Tomography angiography images were exported to Virtual Reality Modeling Language (VRML) format for processing in a 3D software platform and embedding in a 3D Portable Document Format (PDF) document that can be freely downloaded at http://diposit.ub.edu/dspace/handle/2445/112442 and runs under Acrobat Reader on Mac and Windows computers and Windows 10 tablets. The 3D-PDF allows for visualisation and interaction through JavaScript-based functions (including zoom, rotation, selective visualization and transparentation of structures or a predefined sequence view of the main segment classifications if desired). The ICA and its main branches and loops, the Gasserian ganglion, the petrolingual ligament and the proximal and distal dural rings within the skull base environment (anterior and posterior clinoid processes, silla turcica, ethmoid and sphenoid bones, orbital fossae) may be visualized from different perspectives. This interactive 3D-PDF provides virtual views of the ICA and becomes an innovative tool to improve the understanding of the neuroanatomy of the ICA and surrounding structures.},
	language = {eng},
	number = {4},
	journal = {Journal of Medical Systems},
	author = {Valera-Melé, Marc and Puigdellívol-Sánchez, Anna and Mavar-Haramija, Marija and Juanes-Méndez, Juan A. and San-Román, Luis and de Notaris, Matteo and Prats-Galino, Alberto},
	month = mar,
	year = {2018},
	pmid = {29508089},
	keywords = {Humans, Imaging, Three-Dimensional, Models, Anatomic, 3D angiography, Anatomic models, Carotid Artery, Internal, Computed Tomography Angiography, CT angiography, Skull Base, Skull base anatomy, 3D–PDF document, Internal carotid artery segments, Computer Simulation},
	pages = {72},
}

@article{taberneroricoApplicationPDFSoftware2019,
	title = {Application of {PDF} {Software} with {3D} {Functionalities} in {Radiological} {Models} of the {Skull} {Base}: {Characteristics}, {Experience} and {Solutions}},
	volume = {43},
	issn = {1573-689X},
	shorttitle = {Application of {PDF} {Software} with {3D} {Functionalities} in {Radiological} {Models} of the {Skull} {Base}},
	url = {https://pubmed.ncbi.nlm.nih.gov/30874908/},
	doi = {10.1007/s10916-019-1233-5},
	abstract = {A widely known alternative for reading and exchanging digital files is the PDF file, by Adobe. This type of file has become the most used for the electronic exchange of files. It is platform-independent, suitable for the exchange of medical data in electronic academic publication. PDF can support additional resources such image, media, even, three-dimensional surface mesh models. A three-dimensional model of the base of the skull is generated from computed tomography images to provide an overview of the PDF file format, with emphasis on biomedical images. Three-dimensional representation in PDF files offers many advantages, as these images have more information than two-dimensional images, therefore, we consider this tool (3D PDF) a good alternative for the visualization, interaction and distribution of 3D content.},
	language = {eng},
	number = {5},
	journal = {Journal of Medical Systems},
	author = {Tabernero Rico, Roberto D. and Juanes Méndez, Juan Antonio and Prats-Galino, Alberto},
	month = mar,
	year = {2019},
	pmid = {30874908},
	keywords = {3D PDF, Software, Humans, Imaging, Three-Dimensional, Models, Anatomic, Skull Base, Interactive visualization, Skull base, Tomography, X-Ray Computed, Tridimensional images},
	pages = {103},
}

@article{kottnerCommunicating3DDatainteractive2020,
	title = {Communicating {3D} data-interactive {3D} {PDF} documents for expert reports and scientific publications in the field of forensic medicine},
	volume = {134},
	issn = {1437-1596},
	url = {https://pubmed.ncbi.nlm.nih.gov/31602494/},
	doi = {10.1007/s00414-019-02156-4},
	abstract = {INTRODUCTION: Modern forensic investigations increasingly revert to 3D imaging techniques, such as computed tomography, magnetic resonance imaging, and 3D surface imaging. Findings are therefore often based on 3D data sets; however, this information is commonly reported and communicated within 2D imagery. The use of interactive 3D PDFs is already established in the scientific community but has yet to be implemented in the field of forensic medicine.
METHODS AND MATERIALS: Three example cases were chosen to serve as exemplary data for the most commonly applied imaging techniques in postmortem imaging. 3D surface models were created from postmortem magnetic resonance imaging (PMMR), postmortem computed tomography (PMCT), and 3D surface imaging data sets.
RESULTS: PMMR revealed a space-occupying subdural hemorrhage that led to ipsilateral compression of the brain tissue of the right hemisphere. PMCT displayed a defect in the skull on the left side of the temporal bone. 3D surface imaging data displayed a patterned discoloration on the inside of the left forearm.
DISCUSSION: Interactive 3D PDFs offer the possibility to communicate 3D information to the reader while maintaining all the benefits of a regular 2D PDF. With Adobe Acrobat, the reader can interactively navigate through 3D data sets and create sufficient depth cues to generate a realistic 3D perception of the data.
CONCLUSION: The interactive 3D PDF is a useful extension of standard 2D PDFs and has the potential to communicate 3D data to the reader in a more complete, more comprehensible, and less subjective manner than 2D PDFs.},
	language = {eng},
	number = {3},
	journal = {International Journal of Legal Medicine},
	author = {Kottner, Sören and Flach, Patricia Mildred and Gascho, Dominic and Ampanozi, Garyfalia and Thali, Michael and Ebert, Lars C.},
	month = may,
	year = {2020},
	pmid = {31602494},
	keywords = {3D visualization, Software, Humans, Imaging, Three-Dimensional, Documentation, 3D illustration, 3D presentation, Autopsy, Blinded, Data Display, Forensic imaging, Forensic Medicine, Postmortem radiology, Research Report},
	pages = {1175--1183},
}

@article{vandoremalenInfrared3DThermography2020,
	title = {Infrared {3D} {Thermography} for {Inflammation} {Detection} in {Diabetic} {Foot} {Disease}: {A} {Proof} of {Concept}},
	volume = {14},
	issn = {1932-2968},
	shorttitle = {Infrared {3D} {Thermography} for {Inflammation} {Detection} in {Diabetic} {Foot} {Disease}},
	url = {https://pubmed.ncbi.nlm.nih.gov/31200612/},
	doi = {10.1177/1932296819854062},
	abstract = {BACKGROUND: Thermal assessment of the plantar surface of the foot using spot thermometers and thermal imaging has been proven effective in diabetic foot ulcer prevention. However, with traditional cameras this is limited to single spots or a two-dimensional (2D) view of the plantar side of foot, where only 50\% of the ulcers occur. To improve ulcer detection, the view has to be extended beyond 2D. Our aim is to explore for proof of concept the combination of three-dimensional (3D) models with thermal imaging for inflammation detection in diabetic foot disease.
METHOD: From eight participants with a current diabetic foot ulcer we simultaneously acquired a 3D foot model and three thermal infrared images using a high-resolution medical 3D imaging system aligned with three smartphone-based thermal infrared cameras. Using spatial transformations, we aimed to map thermal images onto the 3D model, to create the 3D visualizations. Expert clinicians assessed these for quality and face validity as +, +/-, -.
RESULTS: We could replace the texture maps (color definitions) of the 3D model with the thermal infrared images and created the first-ever 3D thermographs of the diabetic foot. We then converted these models to 3D PDF-files compatible with the hospital IT environment. Face validity was assessed as + in six and +/- in two cases.
CONCLUSIONS: We have provided a proof of concept for the creation of clinically useful 3D thermal foot images to assess the diabetic foot skin temperature in 3D in a hospital IT environment. Future developments are expected to improve the image-processing techniques to result in easier, handheld applications and driving further research.},
	language = {eng},
	number = {1},
	journal = {Journal of Diabetes Science and Technology},
	author = {van Doremalen, Rob F. M. and van Netten, Jaap J. and van Baal, Jeff G. and Vollenbroek-Hutten, Miriam M. R. and van der Heijden, Ferdinand},
	month = jan,
	year = {2020},
	pmid = {31200612},
	pmcid = {PMC7189170},
	keywords = {3D thermography, diabetic foot, foot ulcer, temperature, thermal infrared, three-dimensional},
	pages = {46--54},
	file = {van Doremalen et al. - 2020 - Infrared 3D Thermography for Inflammation Detectio.pdf:/Users/tullsen/Zotero/storage/VC33YWHJ/van Doremalen et al. - 2020 - Infrared 3D Thermography for Inflammation Detectio.pdf:application/pdf},
}

@article{mavar-haramijaInteractive3DPDFPresentations2015,
	title = {Interactive {3D}-{PDF} {Presentations} for the {Simulation} and {Quantification} of {Extended} {Endoscopic} {Endonasal} {Surgical} {Approaches}},
	volume = {39},
	issn = {1573-689X},
	url = {https://pubmed.ncbi.nlm.nih.gov/26306875/},
	doi = {10.1007/s10916-015-0282-7},
	abstract = {A three-dimensional (3D) model of the skull base was reconstructed from the pre- and post-dissection head CT images and embedded in a Portable Document Format (PDF) file, which can be opened by freely available software and used offline. The CT images were segmented using a specific 3D software platform for biomedical data, and the resulting 3D geometrical models of anatomical structures were used for dual purpose: to simulate the extended endoscopic endonasal transsphenoidal approaches and to perform the quantitative analysis of the procedures. The analysis consisted of bone removal quantification and the calculation of quantitative parameters (surgical freedom and exposure area) of each procedure. The results are presented in three PDF documents containing JavaScript-based functions. The 3D-PDF files include reconstructions of the nasal structures (nasal septum, vomer, middle turbinates), the bony structures of the anterior skull base and maxillofacial region and partial reconstructions of the optic nerve, the hypoglossal and vidian canals and the internal carotid arteries. Alongside the anatomical model, axial, sagittal and coronal CT images are shown. Interactive 3D presentations were created to explain the surgery and the associated quantification methods step-by-step. The resulting 3D-PDF files allow the user to interact with the model through easily available software, free of charge and in an intuitive manner. The files are available for offline use on a personal computer and no previous specialized knowledge in informatics is required. The documents can be downloaded at http://hdl.handle.net/2445/55224 .},
	language = {eng},
	number = {10},
	journal = {Journal of Medical Systems},
	author = {Mavar-Haramija, Marija and Prats-Galino, Alberto and Méndez, Juan A. Juanes and Puigdelívoll-Sánchez, Anna and de Notaris, Matteo},
	month = oct,
	year = {2015},
	pmid = {26306875},
	keywords = {Humans, Imaging, Three-Dimensional, Models, Anatomic, Skull Base, Tomography, X-Ray Computed, Endoscopy, Nasal Surgical Procedures, Sphenoid Bone},
	pages = {127},
}

@article{prats-galino3DInteractiveModel2015,
	title = {{3D} interactive model of lumbar spinal structures of anesthetic interest},
	volume = {28},
	issn = {1098-2353},
	url = {https://pubmed.ncbi.nlm.nih.gov/25352014/},
	doi = {10.1002/ca.22479},
	abstract = {A 3D model of lumbar structures of anesthetic interest was reconstructed from human magnetic resonance (MR) images and embedded in a Portable Document Format (PDF) file, which can be opened by freely available software and used offline. The MR images were analyzed using a specific 3D software platform for biomedical data. Models generated from manually delimited volumes of interest and selected MR images were exported to Virtual Reality Modeling Language format and were presented in a PDF document containing JavaScript-based functions. The 3D file and the corresponding instructions and license files can be downloaded freely at http://diposit.ub.edu/dspace/handle/2445/44844?locale=en. The 3D PDF interactive file includes reconstructions of the L3-L5 vertebrae, intervertebral disks, ligaments, epidural and foraminal fat, dural sac and nerve root cuffs, sensory and motor nerve roots of the cauda equina, and anesthetic approaches (epidural medial, spinal paramedial, and selective nerve root paths); it also includes a predefined sequential educational presentation. Zoom, 360° rotation, selective visualization, and transparency graduation of each structure and clipping functions are available. Familiarization requires no specialized informatics knowledge. The ease with which the document can be used could make it valuable for anatomical and anesthetic teaching and demonstration of patient information.},
	language = {eng},
	number = {2},
	journal = {Clinical Anatomy (New York, N.Y.)},
	author = {Prats-Galino, Alberto and Reina, Miguel A. and Mavar Haramija, Marija and Puigdellivol-Sánchez, Anna and Juanes Méndez, Juan A. and De Andrés, José A.},
	month = mar,
	year = {2015},
	pmid = {25352014},
	keywords = {3D PDF, Humans, Imaging, Three-Dimensional, Models, Anatomic, anatomical reconstruction, Anesthesia, Spinal, epidural fat, Lumbar Vertebrae, lumbosacral region, magnetic resonance imaging, Magnetic Resonance Imaging, nerve roots},
	pages = {205--212},
	file = {Prats-Galino et al. - 2015 - 3D interactive model of lumbar spinal structures o.pdf:/Users/tullsen/Zotero/storage/RUNL29MI/Prats-Galino et al. - 2015 - 3D interactive model of lumbar spinal structures o.pdf:application/pdf},
}

@article{vandekampThreedimensionalReconstructionsCome2014,
	title = {Three-dimensional reconstructions come to life--interactive {3D} {PDF} animations in functional morphology},
	volume = {9},
	issn = {1932-6203},
	url = {https://pubmed.ncbi.nlm.nih.gov/25029366/},
	doi = {10.1371/journal.pone.0102355},
	abstract = {Digital surface mesh models based on segmented datasets have become an integral part of studies on animal anatomy and functional morphology; usually, they are published as static images, movies or as interactive PDF files. We demonstrate the use of animated 3D models embedded in PDF documents, which combine the advantages of both movie and interactivity, based on the example of preserved Trigonopterus weevils. The method is particularly suitable to simulate joints with largely deterministic movements due to precise form closure. We illustrate the function of an individual screw-and-nut type hip joint and proceed to the complex movements of the entire insect attaining a defence position. This posture is achieved by a specific cascade of movements: Head and legs interlock mutually and with specific features of thorax and the first abdominal ventrite, presumably to increase the mechanical stability of the beetle and to maintain the defence position with minimal muscle activity. The deterministic interaction of accurately fitting body parts follows a defined sequence, which resembles a piece of engineering.},
	language = {eng},
	number = {7},
	journal = {PloS One},
	author = {van de Kamp, Thomas and dos Santos Rolo, Tomy and Vagovič, Patrik and Baumbach, Tilo and Riedel, Alexander},
	year = {2014},
	pmid = {25029366},
	pmcid = {PMC4100761},
	keywords = {Imaging, Three-Dimensional, Animals, Models, Biological, X-Ray Microtomography, Biomechanical Phenomena, Hip, Synchrotrons, Weevils},
	pages = {e102355},
	file = {van de Kamp et al. - 2014 - Three-dimensional reconstructions come to life--in.pdf:/Users/tullsen/Zotero/storage/5QJ9UEGE/van de Kamp et al. - 2014 - Three-dimensional reconstructions come to life--in.pdf:application/pdf},
}

@article{chungPortableDocumentFormat2020,
	title = {Portable {Document} {Format} {File} {Containing} the {Schematics} and {Operable} {Surface} {Models} of the {Head} {Structures}},
	volume = {35},
	issn = {1598-6357},
	url = {https://pubmed.ncbi.nlm.nih.gov/32657083/},
	doi = {10.3346/jkms.2020.35.e212},
	abstract = {BACKGROUND: A book entitled "Visually Memorable Regional Anatomy (VMRA)" consists of extremely schematic figures as well as concise anatomic knowledge. On the other hand, in the Visible Korean (VK) project, three-dimensional surface models of 297 head structures have been reconstructed. The study's objective was to verify how the coexistence of the schematic figures and realistic surface models affected anatomy learning.
METHODS: In the portable document format (PDF) file of VMRA, 19 pages of the surface models of the head from the PDF file of VK were embedded. The resultant PDF file was utilized as a learning tool of the medical students in two universities.
RESULTS: The PDF file could be downloaded free of charge from anatomy.co.kr. The PDF file has been accessed by users from multiple countries including Korea, United States, and Hungary. In the PDF file, the surface models could be selected in any combinations, magnified, freely rotated, and compared to the corresponding schematics. The number of hours that the PDF file was used by medical students and the scores of written examination on the PDF file showed a low positive correlation in a university. The students replied that the combined PDF file was helpful for understanding anatomy and for doing cadaver dissection. They were also satisfied with the convenience of comparing the surface models and schematics.
CONCLUSION: The freely obtainable PDF file would be a beneficial tool to help students learn anatomy easily, interactively, and accurately.},
	language = {eng},
	number = {27},
	journal = {Journal of Korean Medical Science},
	author = {Chung, Beom Sun and Chung, Min Suk and Park, Jin Seo},
	month = jul,
	year = {2020},
	pmid = {32657083},
	pmcid = {PMC7358063},
	keywords = {Humans, Imaging, Three-Dimensional, Databases, Factual, Models, Anatomic, Books, Cadaver, Education, Medical, Head, Regional Anatomy, Republic of Korea, Three-dimensional Imaging, Visible Human Projects},
	pages = {e212},
	file = {Chung et al. - 2020 - Portable Document Format File Containing the Schem.pdf:/Users/tullsen/Zotero/storage/4GH3RD2A/Chung et al. - 2020 - Portable Document Format File Containing the Schem.pdf:application/pdf},
}

@article{shinPortableDocumentFormat2012,
	title = {Portable document format file showing the surface models of cadaver whole body},
	volume = {27},
	issn = {1598-6357},
	url = {https://pubmed.ncbi.nlm.nih.gov/22876049/},
	doi = {10.3346/jkms.2012.27.8.849},
	abstract = {In the Visible Korean project, 642 three-dimensional (3D) surface models have been built from the sectioned images of a male cadaver. It was recently discovered that popular PDF file enables users to approach the numerous surface models conveniently on Adobe Reader. Purpose of this study was to present a PDF file including systematized surface models of human body as the beneficial contents. To achieve the purpose, fitting software packages were employed in accordance with the procedures. Two-dimensional (2D) surface models including the original sectioned images were embedded into the 3D surface models. The surface models were categorized into systems and then groups. The adjusted surface models were inserted to a PDF file, where relevant multimedia data were added. The finalized PDF file containing comprehensive data of a whole body could be explored in varying manners. The PDF file, downloadable freely from the homepage (http://anatomy.co.kr), is expected to be used as a satisfactory self-learning tool of anatomy. Raw data of the surface models can be extracted from the PDF file and employed for various simulations for clinical practice. The technique to organize the surface models will be applied to manufacture of other PDF files containing various multimedia contents.},
	language = {eng},
	number = {8},
	journal = {Journal of Korean Medical Science},
	author = {Shin, Dong Sun and Chung, Min Suk and Park, Jin Seo and Park, Hyung Seon and Lee, Sangho and Moon, Young Lae and Jang, Hae Gwon},
	month = aug,
	year = {2012},
	pmid = {22876049},
	pmcid = {PMC3410230},
	keywords = {Software, Humans, Imaging, Three-Dimensional, Image Processing, Computer-Assisted, User-Computer Interface, Models, Biological, Models, Anatomic, Male, Cadaver, Visible Human Projects, Human Body},
	pages = {849--856},
	file = {Shin et al. - 2012 - Portable document format file showing the surface .pdf:/Users/tullsen/Zotero/storage/4WVQU6SK/Shin et al. - 2012 - Portable document format file showing the surface .pdf:application/pdf},
}

@article{neweThreeDimensionalPortableDocument2018,
	title = {Three-{Dimensional} {Portable} {Document} {Format} ({3D} {PDF}) in {Clinical} {Communication} and {Biomedical} {Sciences}: {Systematic} {Review} of {Applications}, {Tools}, and {Protocols}},
	volume = {6},
	issn = {2291-9694},
	shorttitle = {Three-{Dimensional} {Portable} {Document} {Format} ({3D} {PDF}) in {Clinical} {Communication} and {Biomedical} {Sciences}},
	url = {https://pubmed.ncbi.nlm.nih.gov/30087092/},
	doi = {10.2196/10295},
	abstract = {BACKGROUND: The Portable Document Format (PDF) is the standard file format for the communication of biomedical information via the internet and for electronic scholarly publishing. Although PDF allows for the embedding of three-dimensional (3D) objects and although this technology has great potential for the communication of such data, it is not broadly used by the scientific community or by clinicians.
OBJECTIVE: The objective of this review was to provide an overview of existing publications that apply 3D PDF technology and the protocols and tools for the creation of model files and 3D PDFs for scholarly purposes to demonstrate the possibilities and the ways to use this technology.
METHODS: A systematic literature review was performed using PubMed and Google Scholar. Articles searched for were in English, peer-reviewed with biomedical reference, published since 2005 in a journal or presented at a conference or scientific meeting. Ineligible articles were removed after screening. The found literature was categorized into articles that (1) applied 3D PDF for visualization, (2) showed ways to use 3D PDF, and (3) provided tools or protocols for the creation of 3D PDFs or necessary models. Finally, the latter category was analyzed in detail to provide an overview of the state of the art.
RESULTS: The search retrieved a total of 902 items. Screening identified 200 in-scope publications, 13 covering the use of 3D PDF for medical purposes. Only one article described a clinical routine use case; all others were pure research articles. The disciplines that were covered beside medicine were many. In most cases, either animal or human anatomies were visualized. A method, protocol, software, library, or other tool for the creation of 3D PDFs or model files was described in 19 articles. Most of these tools required advanced programming skills and/or the installation of further software packages. Only one software application presented an all-in-one solution with a graphical user interface.
CONCLUSIONS: The use of 3D PDF for visualization purposes in clinical communication and in biomedical publications is still not in common use, although both the necessary technique and suitable tools are available, and there are many arguments in favor of this technique. The potential of 3D PDF usage should be disseminated in the clinical and biomedical community. Furthermore, easy-to-use, standalone, and free-of-charge software tools for the creation of 3D PDFs should be developed.},
	language = {eng},
	number = {3},
	journal = {JMIR medical informatics},
	author = {Newe, Axel and Becker, Linda},
	month = aug,
	year = {2018},
	pmid = {30087092},
	pmcid = {PMC6103636},
	note = {https://medinform.jmir.org/2018/3/e10295},
	keywords = {3D visualization, 3D PDF, apps, biomedical science, clinical communication, electronic publishing, interactive, online data sharing, protocols, scholarly publishing, tools},
	pages = {e10295},
	file = {Newe and Becker - 2018 - Three-Dimensional Portable Document Format (3D PDF.pdf:/Users/tullsen/Zotero/storage/VA8AZYMU/Newe and Becker - 2018 - Three-Dimensional Portable Document Format (3D PDF.pdf:application/pdf},
}

@inproceedings{ricoUse3DPDF2018,
	address = {New York, NY, USA},
	series = {{TEEM}'18},
	title = {Use of {3D} {PDF} (portable document format) in radiological models: {Tools} and {Applications}},
	isbn = {978-1-4503-6518-5},
	shorttitle = {Use of {3D} {PDF} (portable document format) in radiological models},
	url = {https://doi.org/10.1145/3284179.3284239},
	doi = {10.1145/3284179.3284239},
	abstract = {PDF is a standard file format to present and exchange documents reliably, independent of the software, hardware or operating system. Therefore, it is very useful and is widely used for the communication of information through the Internet and for the electronic academic publication. The PDF allows embedding 3D objects in the latest versions, and although this technology has great potential for the communication of these data, today is not a tool widely used by the scientific community or biomedical research. The objective of this review is to provide an overview of the tools offered by the PDF software regarding the visualization of three-dimensional models, especially for biomedical images of three-dimensional models generated from radiological images.},
	urldate = {2021-06-04},
	booktitle = {Proceedings of the {Sixth} {International} {Conference} on {Technological} {Ecosystems} for {Enhancing} {Multiculturality}},
	publisher = {Association for Computing Machinery},
	author = {Rico, Roberto D. Tabernero and Méndez, Juan A. Juanes and Prats-Galino, Alberto},
	month = oct,
	year = {2018},
	keywords = {3D PDF, interactive, biomedical, visualization},
	pages = {351--356},
	file = {Rico et al. - 2018 - Use of 3D PDF (portable document format) in radiol.pdf:/Users/tullsen/Zotero/storage/FVBNKWSR/Rico et al. - 2018 - Use of 3D PDF (portable document format) in radiol.pdf:application/pdf},
}

@article{elbashtiTechnicalProtocolPresenting2020,
	title = {Technical {Protocol} for {Presenting} {Maxillofacial} {Prosthetics} {Concepts} to {Dental} {Students} using {Interactive} {3D} {Virtual} {Models} within a {Portable} {Document} {Format}},
	volume = {29},
	issn = {1532-849X},
	url = {https://pubmed.ncbi.nlm.nih.gov/32536004/},
	doi = {10.1111/jopr.13210},
	abstract = {An appropriate presentation of maxillofacial defects and their prosthetic rehabilitation concepts using traditional two-dimensional educational materials is challenging for dental students and prosthodontics residents. This technique article introduces a simple approach to visualize and communicate three-dimensional (3D) virtual models embedded into a portable document format (PDF) file for presenting maxillofacial prosthetics concepts and enhancing students' spatial ability when learning maxillofacial prosthetics. MeVisLab software was used to combine various maxillofacial models and save them as a single 3D model. Adobe Acrobat Pro DC software was used to import the 3D model and create interactive visualization PDF documents. Adobe reader software was then used to visualize the content of the PDF documents. This approach allows educators to develop PDF files with multiple 3D models for teaching maxillofacial prosthetics concepts and communicate them with their students. Students can simply open the PDF file, activate the 3D mode, and interactively manipulate the 3D models to enhance their spatial ability for learning maxillofacial prosthetics.},
	language = {eng},
	number = {6},
	journal = {Journal of Prosthodontics: Official Journal of the American College of Prosthodontists},
	author = {Elbashti, Mahmoud and Aswehlee, Amel and Nguyen, Caroline Tram and Ella, Bruno and Naveau, Adrien},
	month = jul,
	year = {2020},
	pmid = {32536004},
	keywords = {portable document format, Software, Humans, Imaging, Three-Dimensional, 3D virtual models, interactive visualization, Maxillofacial prosthetics, spatial ability, Students, Dental},
	pages = {546--549},
}

@article{neweApplicationEvaluationInteractive2014,
	title = {Application and evaluation of interactive {3D} {PDF} for presenting and sharing planning results for liver surgery in clinical routine},
	volume = {9},
	issn = {1932-6203},
	url = {https://pubmed.ncbi.nlm.nih.gov/25551375/},
	doi = {10.1371/journal.pone.0115697},
	abstract = {BACKGROUND \& OBJECTIVES: The Portable Document Format (PDF) is the de-facto standard for the exchange of electronic documents. It is platform-independent, suitable for the exchange of medical data, and allows for the embedding of three-dimensional (3D) surface mesh models. In this article, we present the first clinical routine application of interactive 3D surface mesh models which have been integrated into PDF files for the presentation and the exchange of Computer Assisted Surgery Planning (CASP) results in liver surgery. We aimed to prove the feasibility of applying 3D PDF in medical reporting and investigated the user experience with this new technology.
METHODS: We developed an interactive 3D PDF report document format and implemented a software tool to create these reports automatically. After more than 1000 liver CASP cases that have been reported in clinical routine using our 3D PDF report, an international user survey was carried out online to evaluate the user experience.
RESULTS: Our solution enables the user to interactively explore the anatomical configuration and to have different analyses and various resection proposals displayed within a 3D PDF document covering only a single page that acts more like a software application than like a typical PDF file ("PDF App"). The new 3D PDF report offers many advantages over the previous solutions. According to the results of the online survey, the users have assessed the pragmatic quality (functionality, usability, perspicuity, efficiency) as well as the hedonic quality (attractiveness, novelty) very positively.
CONCLUSION: The usage of 3D PDF for reporting and sharing CASP results is feasible and well accepted by the target audience. Using interactive PDF with embedded 3D models is an enabler for presenting and exchanging complex medical information in an easy and platform-independent way. Medical staff as well as patients can benefit from the possibilities provided by 3D PDF. Our results open the door for a wider use of this new technology, since the basic idea can and should be applied for many medical disciplines and use cases.},
	language = {eng},
	number = {12},
	journal = {PloS One},
	author = {Newe, Axel and Becker, Linda and Schenk, Andrea},
	year = {2014},
	pmid = {25551375},
	pmcid = {PMC4281211},
	keywords = {Humans, Information Dissemination, Documentation, User-Computer Interface, Models, Anatomic, Liver, Research Design, Surgery, Computer-Assisted},
	pages = {e115697},
	file = {Newe et al. - 2014 - Application and evaluation of interactive 3D PDF f.pdf:/Users/tullsen/Zotero/storage/7M5MXCNK/Newe et al. - 2014 - Application and evaluation of interactive 3D PDF f.pdf:application/pdf},
}

@inproceedings{nursiahReversibleDataHiding2019,
	title = {Reversible {Data} {Hiding} in {PDF} {Document} {Exploiting} {Prefix} {Zeros} in {Glyph} {Coordinates}},
	url = {https://ieeexplore.ieee.org/document/9023310},
	doi = {10.1109/APSIPAASC47483.2019.9023310},
	abstract = {In the contemporary world of information technology, PDF (Portable Document Format) has become the de facto document standard which allows users to exchange and view electronic documents across various platforms. PDF is the most widely exchanged document format since Internet gained popularity. Although PDF has a good authenticity system by making use of digital signatures, the file format is still susceptible to copyright infringement as there are many libraries available on the Internet to bypass the digital signature of a PDF. Therefore, claiming ownership for PDF has become a paramount issue that needs to be addressed. This paper proposes the idea of hiding data in the glyph positioning coordinate value. To suppress bit stream size increment, the reverse zero-run length coding technique is adopted. Experiments are conducted to verify the basic performance of the proposed data hiding method. In the best case scenario, 0.62 bits of data can be embedded into each Byte of the PDF file. The injected leading zeros can be removed to restore the original PDF file.},
	booktitle = {2019 {Asia}-{Pacific} {Signal} and {Information} {Processing} {Association} {Annual} {Summit} and {Conference} ({APSIPA} {ASC})},
	author = {Nursiah, Neelesh and Wong, KokSheik and Kuribayashi, Minoru},
	month = nov,
	year = {2019},
	note = {ISSN: 2640-0103},
	keywords = {PDF, digital signature, Portable document format, digital signatures, Internet, Portable Document Format, Libraries, data compression, document image processing, image coding, Containers, Payloads, Visualization, data encapsulation, PDF document, Arrays, copyright, copyright infringement, data hiding, data hiding method, de facto document standard, electronic documents, glyph coordinates, glyph positioning coordinate value, PDF file format, prefix zeros, reverse zero-run length, reverse zero-run length coding technique, reversible data hiding, widely exchanged document, word length 0.62 bit},
	pages = {1298--1302},
	file = {IEEE Xplore Full Text PDF:/Users/tullsen/Zotero/storage/GTBSFJB9/Nursiah et al. - 2019 - Reversible Data Hiding in PDF Document Exploiting .pdf:application/pdf;IEEE Xplore Abstract Record:/Users/tullsen/Zotero/storage/YYG2I93I/9023310.html:text/html;Nursiah et al. - 2019 - Reversible Data Hiding in PDF Document Exploiting .pdf:/Users/tullsen/Zotero/storage/AKLN2JUF/Nursiah et al. - 2019 - Reversible Data Hiding in PDF Document Exploiting .pdf:application/pdf},
}

@inproceedings{dongxuResearchAnalysisCheck2020,
	title = {Research on the {Analysis} and {Check} of {Electrical} {Secondary} {PDF} {Drawings} {Based} on {Deep} {Learning}},
	doi = {10.1109/ICPRE51194.2020.9233302},
	abstract = {Aiming at the problems such as the large amount of manual checking, the influence of the quality of checking on the technical level of the personnel, the quantity of the project and the working attitude, and the influence of the quality of the drawings on the progress and quality of the engineering construction, a method for analyzing and checking PDF drawings based on deep learning was proposed. Using artificial intelligence and image processing technology to analyze and identify drawings, and designing automatic checking of drawings, the quality and efficiency of drawing checking were improved. Among them, the analysis method of drawings mainly includes the conversion of PDF files into picture files, the identification of components in the drawings, the identification and screening of lines, the segmentation of long lines according to the location of components, the segmentation of line according to the relationship between lines and the output of wires and components. It is of great significance to quickly analyze and study the PDF drawings. To some extent, it can avoid the non-standard situation after drawing, improve the quality of engineering design, and provide a strong guarantee for the follow-up checking work.},
	booktitle = {2020 5th {International} {Conference} on {Power} and {Renewable} {Energy} ({ICPRE})},
	author = {Dongxu, Z. and Baohong, G. and Rui, B. and Yonggang, F.},
	month = sep,
	year = {2020},
	keywords = {deep learning, learning (artificial intelligence), automatic check, drawing checking, electrical engineering design, electrical secondary PDF drawings, image segmentation, manual checking, neural nets, parsing and identification, PDF drawing, technical drawing},
	pages = {507--511},
	file = {Dongxu et al. - 2020 - Research on the Analysis and Check of Electrical S.pdf:/Users/tullsen/Zotero/storage/56E7W247/Dongxu et al. - 2020 - Research on the Analysis and Check of Electrical S.pdf:application/pdf;Dongxu et al. - 2020 - Research on the Analysis and Check of Electrical S.pdf:/Users/tullsen/Zotero/storage/7IGS6ZFC/Dongxu et al. - 2020 - Research on the Analysis and Check of Electrical S.pdf:application/pdf},
}

@inproceedings{zhaoResearchDoubleWatermarking2020,
	title = {Research on {Double} {Watermarking} {Algorithm} {Based} on {PDF} {Document} {Structure}},
	url = {https://ieeexplore.ieee.org/document/9262793},
	doi = {10.1109/ICCST50977.2020.00064},
	abstract = {The transmission of PDF files is becoming more and more popular on the Internet. In order to ensure that electronic publications are published and distributed on the Internet, copyright transactions occur, or the original watermarked documents are still fully extracted when the watermark information is extracted, a double watermarking algorithm based on PDF documents is designed: robustness against attacks watermarking algorithm and watermarking algorithm to verify the integrity of content. The experimental results demonstrate that the algorithm has good invisibility and robustness.},
	booktitle = {2020 {International} {Conference} on {Culture}-oriented {Science} {Technology} ({ICCST})},
	author = {Zhao, Weijuan and Guan, Hu and Huang, Ying and Zhang, Shuwu},
	month = oct,
	year = {2020},
	keywords = {Portable document format, Data mining, Robustness, Encryption, Watermarking, copyright, PDFdocument, Resistance, Robustnesst, Software algorithms},
	pages = {298--303},
	file = {Zhao et al. - 2020 - Research on Double Watermarking Algorithm Based on.pdf:/Users/tullsen/Zotero/storage/YPHT47PA/Zhao et al. - 2020 - Research on Double Watermarking Algorithm Based on.pdf:application/pdf;Zhao et al. - 2020 - Research on Double Watermarking Algorithm Based on.pdf:/Users/tullsen/Zotero/storage/TDUGSCIV/Zhao et al. - 2020 - Research on Double Watermarking Algorithm Based on.pdf:application/pdf},
}

@article{liuAdaptiveSTDMBasedPDF2020,
	title = {Adaptive {STDM}-{Based} {PDF} {Documents} {Watermarking} {Algorithm} {Robust} to {Fixed} {Gain} {Attack}},
	volume = {1673},
	issn = {1742-6588, 1742-6596},
	url = {https://iopscience.iop.org/article/10.1088/1742-6596/1673/1/012021},
	doi = {10.1088/1742-6596/1673/1/012021},
	abstract = {Spread Transform Dither Modulation (STDM) has good performance in robustness against Additive White Gaussian Noise (AWGN) and re-quantization, so it has been widely used in image watermarking algorithms. However, STDM cannot resist Fixed Gain Attack (FGA). In the paper, we introduce an adaptive function into the traditional STDM algorithm, so that it can automatically adjust the amount of distortion according to the text line spacing in the document. Then we give an improved adaptive function to make our algorithm robust to FGA. Analysis and experimental results show that the algorithm proposed in this paper has good invisibility and high robustness against AWGN and FGA.},
	language = {en},
	urldate = {2020-11-29},
	journal = {Journal of Physics: Conference Series},
	author = {Liu, Jingcai and Liu, Naixi and Li, Shundong and Wang, Daoshun},
	month = nov,
	year = {2020},
	pages = {012021},
	file = {Liu et al. - 2020 - Adaptive STDM-Based PDF Documents Watermarking Alg.pdf:/Users/tullsen/Zotero/storage/FMP9J2IH/Liu et al. - 2020 - Adaptive STDM-Based PDF Documents Watermarking Alg.pdf:application/pdf},
}

@phdthesis{hatoumDigitalWatermarkingPDF2020,
	address = {France},
	type = {{PhD}},
	title = {Digital {Watermarking} for {PDF} {Documents} and {Images}: {Security}, {Robustness} and {AI}-based attack},
	shorttitle = {Digital {Watermarking} for {PDF} {Documents} and {Images}},
	url = {http://rgdoi.net/10.13140/RG.2.2.14903.14248},
	abstract = {Technological development has its pros and cons.  Nowadays, we can easily share,download,  and  upload  digital  content  using  the  Internet.    Also,  malicious  users  canillegally change, duplicate, and distribute any kind of information, such as images anddocuments.   Therefore,  we  should  protect  such  contents  and  arrest  the  perpetrator.The  goal  of  this  thesis  is  to  protect  PDF  documents  and  images  using  the  SpreadTransform Dither Modulation (STDM), as a digital watermarking technique, while takinginto consideration the main requirements of transparency, robustness, and security.STDM  watermarking  scheme  achieved  a  good  level  of  transparency  and  robustnessagainst  noise  attacks.   The  key  to  this  scheme  is  the  projection  vector  that  aims  tospreads the embedded message over a set of cover elements.   However,  such a keyvector can be estimated by unauthorized users using the Blind Source Separation (BSS)techniques.  In our first contribution, we present our proposed CAR-STDM (ComponentAnalysis  Resistant-STDM)  watermarking  scheme,   which  guarantees  security  whilepreserving the transparency and robustness against noise attacks.STDM is also affected by the Fixed Gain Attack (FGA). In the second contribution, wepresent our proposed N-STDM watermarking scheme that resists the FGA attack andenhances  the  robustness  against  the  Additive  White  Gaussian  Noise  (AWGN)  attack,JPEG compression  attack,  and  variety of  filtering and  geometric attacks.   Experimen-tations  have  been  conducted  distinctly  on  PDF  documents  and  images  in  the  spatialdomain and frequency domain.Recently,  Deep  Learning  and  Neural  Networks  achieved  noticeable  development  andimprovement, especially in image processing, segmentation, and classification.  Diversemodels such as Convolutional Neural Network (CNN) are exploited for modeling imagepriors for denoising. CNN has a suitable denoising performance, and it could be harmfulto  watermarked  images.    In  the  third  contribution,  we  present  the  effect  of  a  FullyConvolutional Neural Network (FCNN), as a denoising attack, on watermarked images.STDM  and  Spread  Spectrum  (SS)  are  used  as  watermarking  schemes  to  embed  the watermarks  in  the  images  using  several  scenarios.   This  evaluation  shows  that  suchtype of denoising attack preserves the image quality while breaking the robustness of allevaluated watermarked schemes},
	language = {English},
	urldate = {2020-11-03},
	school = {Universite Bourgogne Franche-Comte},
	author = {Hatoum, Makram and Couchot, Jean-François and Darazi, Rony},
	year = {2020},
	file = {Hatoum et al. - 2020 - Digital Watermarking for PDF Documents and Images.pdf:/Users/tullsen/Zotero/storage/RD9FUJVX/Hatoum et al. - 2020 - Digital Watermarking for PDF Documents and Images.pdf:application/pdf},
}

@misc{AuthenticationHowAchieve,
	title = {authentication - {How} to achieve non-repudiation?},
	url = {https://security.stackexchange.com/questions/1786/how-to-achieve-non-repudiation},
	urldate = {2020-08-15},
	journal = {Information Security Stack Exchange},
}

@inproceedings{wangExtractionMathExpressions2019,
	title = {Extraction of {Math} {Expressions} from {PDF} {Documents} {Based} on {Unsupervised} {Modeling} of {Fonts}},
	url = {https://ieeexplore.ieee.org/document/8978012},
	doi = {10.1109/ICDAR.2019.00068},
	abstract = {This paper proposes a multi-stage architecture to extract math expressions (ME) from PDF documents based on font analysis. The unsupervised algorithm starts from symbol level analysis based on metadata of PDF objects, including font size, font name, and glyph name. Two subsequent stages utilize a group of spatial and semantic heuristics to merge multiple ME symbols into both inline ME and displayed ME. The algorithm is tested on the Marmot dataset (amended with missing cases). For displayed ME, the proposed method achieved 93.6\% precision, 99.4\% recall, and 96.4\% F1-score. For inline ME, the method achieved 92.2\% precision, 91.9\% recall, and 92.1\% F1-score. In addition, the algorithm only takes an average of 1.09s to process a page, which is faster than other existing methods.},
	booktitle = {2019 {International} {Conference} on {Document} {Analysis} and {Recognition} ({ICDAR})},
	author = {Wang, Zelun and Beyette, Donald and Lin, Jason and Liu, Jyh-Charn},
	month = sep,
	year = {2019},
	note = {ISSN: 2379-2140},
	keywords = {Portable document format, Metadata, Feature extraction, Optical character recognition software, pdf documents, Shape, Computer science, Histograms, likehood ratio test, math expressions, unsupervised learning},
	pages = {381--386},
	file = {Wang et al. - 2019 - Extraction of Math Expressions from PDF Documents .pdf:/Users/tullsen/Zotero/storage/4ESX54NA/Wang et al. - 2019 - Extraction of Math Expressions from PDF Documents .pdf:application/pdf},
}

@article{wangImprovingAccessibilityScientific2021,
	title = {Improving the {Accessibility} of {Scientific} {Documents}: {Current} {State}, {User} {Needs}, and a {System} {Solution} to {Enhance} {Scientific} {PDF} {Accessibility} for {Blind} and {Low} {Vision} {Users}},
	volume = {2105},
	shorttitle = {Improving the {Accessibility} of {Scientific} {Documents}},
	url = {http://adsabs.harvard.edu/abs/2021arXiv210500076W},
	abstract = {The majority of scientific papers are distributed in PDF, which pose 
challenges for accessibility, especially for blind and low vision (BLV)
readers. We characterize the scope of this problem by assessing the
accessibility of 11,397 PDFs published 2010--2019 sampled across various
fields of study, finding that only 2.4\% of these PDFs satisfy all of our
defined accessibility criteria. We introduce the SciA11y system to
offset some of the issues around inaccessibility. SciA11y incorporates
several machine learning models to extract the content of scientific
PDFs and render this content as accessible HTML, with added novel
navigational features to support screen reader users. An intrinsic
evaluation of extraction quality indicates that the majority of HTML
renders (87\%) produced by our system have no or only some readability
issues. We perform a qualitative user study to understand the needs of
BLV researchers when reading papers, and to assess whether the SciA11y
system could address these needs. We summarize our user study findings
into a set of five design recommendations for accessible scientific
reader systems. User response to SciA11y was positive, with all users
saying they would be likely to use the system in the future, and some
stating that the system, if available, would become their primary
workflow. We successfully produce HTML renders for over 12M papers, of
which an open access subset of 1.5M are available for browsing at
https://scia11y.org/},
	urldate = {2021-05-10},
	journal = {arXiv e-prints},
	author = {Wang, Lucy Lu and Cachola, Isabel and Bragg, Jonathan and Yu-Yen Cheng, Evie and Haupt, Chelsea and Latzke, Matt and Kuehl, Bailey and van Zuylen, Madeleine and Wagner, Linda and Weld, Daniel S.},
	month = apr,
	year = {2021},
	keywords = {Computer Science - Digital Libraries, Computer Science - Human-Computer Interaction},
	pages = {arXiv:2105.00076},
	file = {Wang et al. - 2021 - Improving the Accessibility of Scientific Document.pdf:/Users/tullsen/Zotero/storage/TA7NR587/Wang et al. - 2021 - Improving the Accessibility of Scientific Document.pdf:application/pdf},
}

@inproceedings{hashmiRuleBasedApproach2020,
	title = {Rule {Based} {Approach} to {Extract} {Metadata} from {Scientific} {PDF} {Documents}},
	doi = {10.1109/CITISIA50690.2020.9371784},
	abstract = {The number of scientific PDF documents is increasing at a very rapid pace. The searching for these documents is becoming a time consuming task, due to the large number of PDF documents. To make the search and storage more efficient, we need a mechanism to extract metadata from these documents and store this metadata according to their semantics. Extracting information from metadata and storing that information is very time consuming task and requires lots of human effort if performed manually due to large numbers of documents and their varying formats. In this paper, we present a rule-based approach to extract metadata information from the research articles. This approach was developed and evaluated on a diverse data-set provided by ESWC (2016) having a number of different formats and features. Evaluation results show that our proposed approach performs 22\% better than CERMINE and 9\% better than GROBID.},
	booktitle = {2020 5th {International} {Conference} on {Innovative} {Technologies} in {Intelligent} {Systems} and {Industrial} {Applications} ({CITISIA})},
	author = {Hashmi, Ahmer Maqsood and Afzal, Muhammad Tanvir and Rehman, Sabih ur},
	month = nov,
	year = {2020},
	keywords = {PDF, Portable document format, Data mining, Metadata, Feature extraction, XML, extraction, information, Machine Learning (ML), Rule-based, Semantics, Task analysis},
	pages = {1--4},
	file = {IEEE Xplore Abstract Record:/Users/tullsen/Zotero/storage/R38CCWAG/9371784.html:text/html;Hashmi et al. - 2020 - Rule Based Approach to Extract Metadata from Scien.pdf:/Users/tullsen/Zotero/storage/IL3R9KQG/Hashmi et al. - 2020 - Rule Based Approach to Extract Metadata from Scien.pdf:application/pdf},
}

@inproceedings{mikhailovGraphBasedVerificationPDF2020,
	title = {On {Graph}-{Based} {Verification} for {PDF} {Table} {Detection}},
	doi = {10.1109/ISPRAS51486.2020.00020},
	abstract = {Many non-editable documents are shared in PDF (Portable Document Format). They are typically not accompanied by tags for annotating the page layout, including table positions. One of the important challenges of the analysis and understanding of such documents is table detection. This paper outlines a novel two-phase approach to the table detection in untagged PDF documents. The first phase uses deep neural networks (DNN) to predict some table candidates. The second phase selects probable tables from the candidates by verifying their graph representation. We build a weighted directed graph from text blocks inside a predicted area of a table. A set of such graphs produced from the “ICDAR 2013 Table Competition” dataset allowed us to train a verification model based on the Random Forest technique. The empirical results for competitive dataset demonstrated high performance of our implementation of this approach. We showed that additional verification enables reduction of errors and improvement of results of the PDF table detection.},
	booktitle = {2020 {Ivannikov} {Ispras} {Open} {Conference} ({ISPRAS})},
	author = {Mikhailov, Andrey and Shigarov, Alexey and Rozhkov, Egor and Cherepanov, Igor},
	month = dec,
	year = {2020},
	keywords = {Portable document format, Feature extraction, Neural networks, Performance evaluation, Document analysis, Layout, PDF accessibility, Predictive models, Random forests, Table detection, Table extraction, Table recognition},
	pages = {91--95},
	file = {Mikhailov et al. - 2020 - On Graph-Based Verification for PDF Table Detectio.pdf:/Users/tullsen/Zotero/storage/Y3MKFCDH/Mikhailov et al. - 2020 - On Graph-Based Verification for PDF Table Detectio.pdf:application/pdf},
}

@inproceedings{zhangIntegratedApproachDeep2021,
	title = {An {Integrated} {Approach} of {Deep} {Learning} and {Symbolic} {Analysis} for {Digital} {PDF} {Table} {Extraction}},
	doi = {10.1109/ICPR48806.2021.9413069},
	abstract = {Deep learning has shown great success at interpreting unstructured data such as object recognition in images. Symbolic/logical-reasoning techniques have shown great success in interpreting structured data such as table extraction in webpages, custom text files, spreadsheets. The tables in PDF documents are often generated from such structured sources (text-based Word/LATEX documents, spreadsheets, webpages) but end up being unstructured. We thus explore novel combinations of deep learning and symbolic reasoning techniques to build an effective solution for PDF table extraction. We evaluate effectiveness without granting partial credit for matching part of a table (which may cause silent errors in downstream data processing). Our method achieves a 0.725 $_{\textrm{1}}$ score (vs. 0.339 for the state-of-the-art) on detecting correct table bounds-a much stricter metric than the common one of detecting characters within tables-in a well known public benchmark (ICDAR 2013) and a 0.404 $_{\textrm{1}}$ score (vs. 0.144 for the state-of-the-art) on our private benchmark with more widely varied table structures.},
	booktitle = {2020 25th {International} {Conference} on {Pattern} {Recognition} ({ICPR})},
	author = {Zhang, Mengshi and Perelman, Daniel and Le, Vu and Gulwani, Sumit},
	month = jan,
	year = {2021},
	note = {ISSN: 1051-4651},
	keywords = {Portable document format, Deep learning, Machine learning algorithms, Semantics, Benchmark testing, Location awareness, Measurement},
	pages = {4062--4069},
	file = {IEEE Xplore Abstract Record:/Users/tullsen/Zotero/storage/RJWG9MVV/9413069.html:text/html;IEEE Xplore Full Text PDF:/Users/tullsen/Zotero/storage/JB2AUZNQ/Zhang et al. - 2021 - An Integrated Approach of Deep Learning and Symbol.pdf:application/pdf},
}

@article{shahMathFormulaExtraction2021,
	title = {A {Math} {Formula} {Extraction} and {Evaluation} {Framework} for {PDF} {Documents}},
	volume = {2021},
	url = {https://www.cs.rit.edu/~rlaz/files/ICDAR2021_MathSeer_Pipeline.pdf},
	abstract = {We present a processing pipeline for math formula extraction in PDF documents that takes advantage of character information in born-digital PDFs (e.g., created using LATEX or Word). Our pipeline is designed for indexing math in technical document collections to support math-aware search engines capable of processing queries containing keywords and formulas. The system includes user-friendly tools for visualizing recognition results in HTML pages. Our pipeline is comprised of a new state-of-the-art PDF character extractor that identiﬁes precise bounding boxes for non-Latin symbols, a novel Single Shot Detectorbased formula detector, and an existing graph-based formula parser (QDGGA) for recognizing formula structure. To simplify analyzing structure recognition errors, we have extended the LgEval library (from the CROHME competitions) to allow viewing all instances of speciﬁc errors by clicking on HTML links. Our source code is publicly available.},
	language = {English},
	journal = {ICDAR},
	author = {Shah, Ayush Kumar and Dey, Abhisek and Zanibbi, Richard},
	month = may,
	year = {2021},
	pages = {16},
	file = {Shah et al. - A Math Formula Extraction and Evaluation Framework.pdf:/Users/tullsen/Zotero/storage/2E3CVYLF/Shah et al. - A Math Formula Extraction and Evaluation Framework.pdf:application/pdf},
}

@inproceedings{wiechorkAutomatedDataExtraction2021,
	address = {Online Streaming},
	title = {Automated {Data} {Extraction} from {PDF} {Documents}: {Application} to {Large} {Sets} of {Educational} {Tests}:},
	isbn = {978-989-758-509-8},
	shorttitle = {Automated {Data} {Extraction} from {PDF} {Documents}},
	url = {https://www.scitepress.org/DigitalLibrary/Link.aspx?doi=10.5220/0010524503590366},
	doi = {10.5220/0010524503590366},
	language = {English},
	urldate = {2021-05-13},
	booktitle = {Proceedings of the 23rd {International} {Conference} on {Enterprise} {Information} {Systems}},
	publisher = {SCITEPRESS - Science and Technology Publications},
	author = {Wiechork, Karina and Charão, Andrea},
	year = {2021},
	pages = {359--366},
	file = {Wiechork and Charão - 2021 - Automated Data Extraction from PDF Documents Appl.pdf:/Users/tullsen/Zotero/storage/6KKEA3QT/Wiechork and Charão - 2021 - Automated Data Extraction from PDF Documents Appl.pdf:application/pdf},
}

@incollection{acosta-vargasPortableDocumentFormat2020,
	edition = {International Conference on Applied Human Factors and Ergonomics},
	series = {Advances in {Human} {Factors} and {Systems} {Interaction}},
	title = {The {Portable} {Document} {Format}: {An} {Analysis} of {PDF} {Accessibility}},
	isbn = {978-3-030-51368-9},
	shorttitle = {The {Portable} {Document} {Format}},
	url = {https://www.researchgate.net/publication/342585893_The_Portable_Document_Format_An_Analysis_of_PDF_Accessibility},
	abstract = {Today, PDFs are frequently used as part of the preservation of historical documents in libraries, and they are also one of the most used formats on the web when sharing information. Unfortunately, most shared documents are not accessible, especially for users with disabilities. To solve this problem, we propose to relate accessibility techniques for PDF documents in accordance with the Web Content Accessibility Guidelines (WCAG) 2.1. As a case study, we have selected a random sample of 10 documents related to the modern architectural heritage of Quito. The authors applied a combined method to check accessibility in PDFs with the help of the PDF Accessibility Checker version 3.0},
	language = {English},
	publisher = {Springer International Publishing},
	author = {Acosta-Vargas, Patricia and Gonzalez, Mario and Zambrano, Maria and Medina, Ana and Zweig, Noah and Salvador-Ullauri, Luis},
	editor = {Nunes, Isabel L.},
	month = jul,
	year = {2020},
	doi = {10.1007/978-3-030-51369-6_28},
	note = {https://www.semanticscholar.org/paper/The-Portable-Document-Format\%3A-An-Analysis-of-PDF-Acosta-Vargas-Acosta-Vargas/013c1d43707d4bb5985715caec6b5c15e0b2091e},
	pages = {206--214},
	file = {Acosta-Vargas et al. - 2020 - The Portable Document Format An Analysis of PDF A.pdf:/Users/tullsen/Zotero/storage/J2E2TWIW/Acosta-Vargas et al. - 2020 - The Portable Document Format An Analysis of PDF A.pdf:application/pdf},
}

@misc{KensystemAllPapers,
	title = {ken-system: - {All} papers on "{PDF}"},
	url = {https://www.ieice.org/ken/search/index.php?search_mode=form&year=39&psort=1&pskey=keyword%3APDF&ps1=1&ps4=1&ps5=1&layout=&lang=eng&term=KEYWORD},
	urldate = {2021-04-17},
	file = {ken-system\: - All Technical Committee Conferences - All Years:/Users/tullsen/Zotero/storage/5VF58KN4/index.html:text/html},
}

@article{kostrinsky-thomasSearchingPDFHaystack2021,
	title = {Searching the {PDF} {Haystack}: {Automated} {Knowledge} {Discovery} in {Scanned} {EHR} {Documents}},
	volume = {12},
	copyright = {Georg Thieme Verlag KG Rüdigerstraße 14, 70469 Stuttgart, Germany},
	issn = {1869-0327},
	shorttitle = {Searching the {PDF} {Haystack}},
	url = {http://www.thieme-connect.de/DOI/DOI?10.1055/s-0041-1726103},
	doi = {10.1055/s-0041-1726103},
	abstract = {Background Clinicians express concern that they may be unaware of important information contained in voluminous scanned and other outside documents contained in electronic health records (EHRs). An example is “unrecognized EHR risk factor information,” defined as risk factors for heritable cancer that exist within a patient's EHR but are not known by current treating providers. In a related study using manual EHR chart review, we found that half of the women whose EHR contained risk factor information meet criteria for further genetic risk evaluation for heritable forms of breast and ovarian cancer. They were not referred for genetic counseling.

  Objectives The purpose of this study was to compare the use of automated methods (optical character recognition with natural language processing) versus human review in their ability to identify risk factors for heritable breast and ovarian cancer within EHR scanned documents.

  Methods We evaluated the accuracy of the chart review by comparing our criterion standard (physician chart review) versus an automated method involving Amazon's Textract service (Amazon.com, Seattle, Washington, United States), a clinical language annotation modeling and processing toolkit (CLAMP) (Center for Computational Biomedicine at The University of Texas Health Science, Houston, Texas, United States), and a custom-written Java application.

  Results We found that automated methods identified most cancer risk factor information that would otherwise require clinician manual review and therefore is at risk of being missed.

  Conclusion The use of automated methods for identification of heritable risk factors within EHRs may provide an accurate yet rapid review of patients' past medical histories. These methods could be further strengthened via improved analysis of handwritten notes, tables, and colloquial phrases.},
	language = {English},
	number = {2},
	urldate = {2021-03-29},
	journal = {Applied Clinical Informatics},
	author = {Kostrinsky-Thomas, Alexander L. and Hisama, Fuki M. and Payne, Thomas H.},
	month = mar,
	year = {2021},
	note = {Publisher: Georg Thieme Verlag KG},
	keywords = {portable document format, machine learning, electronic health records, evaluation, natural language processing, optical character recognition},
	pages = {245--250},
}

@article{mullerAccessibleDocumentsHow2021,
	title = {Accessible {Documents} – {How} to create accessible {PDF} documents with {Word}},
	language = {eng},
	author = {Müller, Emma},
	month = feb,
	year = {2021},
	file = {Qucosa - Technische Universität Dresden\: Accessible Documents – How to create accessible PDF documents with Word:/Users/tullsen/Zotero/storage/M3MV8NHE/landing-page.html:text/html},
}

@article{mullerAccessibleDocumentsHow2021a,
	title = {Accessible {Documents} – {How} to create accessible {PDF} documents with {PowerPoint}},
	language = {eng},
	author = {Müller, Emma},
	month = feb,
	year = {2021},
	file = {Qucosa - Technische Universität Dresden\: Accessible Documents – How to create accessible PDF documents with PowerPoint:/Users/tullsen/Zotero/storage/K2B8SWPU/landing-page.html:text/html},
}

@article{warasartPaperbasedDocumentAuthentication2012,
	title = {Paper-based {Document} {Authentication} using {Digital} {Signature} and {QR} {Code}},
	url = {http://as.nida.ac.th/~pramote/publications/paper-based_document_authentication_using_digital_sinature_and_qr_code-iccet2012.pdf},
	abstract = {There are still needs for paper-based documents in certain circumstances where electronic documents cannot efficiently replace them. For example, documents issued by the government such as birth certificates, driver licenses, and passports must be paper-based. With advanced scanning and printing technologies, paper-based document fraud can easily be conducted without significant high cost. In this paper, an implementation of paper-based document authentication is presented. The integrity of the text message and the author of the document can be verified with the use of a digital signature and QR code. The proposed method can be automatic or semi-automatic. It is semi-automatic when the OCR is not accurate and it requires the user to visually compare the text message on the paper and the one obtained from the QR code; however, this method does provide convenience for the user in dealing with a large amount of documents.},
	language = {English},
	journal = {2012 4th International Conference on Computer Engineering and Technology (ICCET 2012)},
	author = {Warasart, Maykin and Kuacharoen, Pramote},
	year = {2012},
	pages = {5},
	file = {Warasart and Kuacharoen - Paper-based Document Authentication using Digital .pdf:/Users/tullsen/Zotero/storage/V88GU726/Warasart and Kuacharoen - Paper-based Document Authentication using Digital .pdf:application/pdf},
}

@article{livathinosRobustPDFDocument2021,
	title = {Robust {PDF} {Document} {Conversion} {Using} {Recurrent} {Neural} {Networks}},
	volume = {IAAI-21},
	url = {http://arxiv.org/abs/2102.09395},
	abstract = {The number of published PDF documents in both the academic and commercial world has increased exponentially in recent decades. There is a growing need to make their rich content discoverable to information retrieval tools. Achieving high-quality semantic searches demands that a document’s structural components such as title, section headers, paragraphs, (nested) lists, tables and ﬁgures (including their captions) are properly identiﬁed. Unfortunately, the PDF format is known to not conserve such structural information because it simply represents a document as a stream of low-level printing commands, in which one or more characters are placed in a bounding box with a particular styling. In this paper, we present a novel approach to document structure recovery in PDF using recurrent neural networks to process the low-level PDF data representation directly, instead of relying on a visual re-interpretation of the rendered PDF page, as has been proposed in previous literature. We demonstrate how a sequence of PDF printing commands can be used as input into a neural network and how the network can learn to classify each printing command according to its structural function in the page. This approach has three advantages: First, it can distinguish among more ﬁne-grained labels (typically 10–20 labels as opposed to 1–5 with visual methods), which results in a more accurate and detailed document structure resolution. Second, it can take into account the text ﬂow across pages more naturally compared to visual methods because it can concatenate the printing commands of sequential pages. Last, our proposed method needs less memory and it is computationally less expensive than visual methods. This allows us to deploy such models in production environments at a much lower cost. Through extensive architectural search in combination with advanced feature engineering, we were able to implement a model that yields a weighted average F1 score of 97\% across 17 distinct structural labels. The best model we achieved is currently served in production environments on our Corpus Conversion Service (CCS), which was presented at KDD18. This model enhances the capabilities of CCS signiﬁcantly, as it eliminates the need for human annotated label ground-truth for every unseen document layout. This proved particularly useful when applied to a huge corpus of PDF articles related to COVID-19.},
	language = {English},
	urldate = {2021-02-22},
	journal = {arXiv:2102.09395 [cs]},
	author = {Livathinos, Nikolaos and Berrospi, Cesar and Lysak, Maksym and Kuropiatnyk, Viktor and Nassar, Ahmed and Carvalho, Andre and Dolfi, Michele and Auer, Christoph and Dinkla, Kasper and Staar, Peter},
	month = feb,
	year = {2021},
	note = {arXiv: 2102.09395
Comment: 9 pages, 2 tables, 4 figures, uses aaai21.sty. 
Accepted at the "Thirty-Third Annual Conference on Innovative Applications of Artificial Intelligence (IAAI-21)". 
Received the "IAAI-21 Innovative Application Award"},
	keywords = {Computer Science - Information Retrieval, Computer Science - Machine Learning, Computer Science - Computer Vision and Pattern Recognition, I.2.1, I.5.1, I.5.2, I.5.4, I.5.5, I.7.5},
	pages = {9},
	file = {Livathinos et al. - 2021 - Robust PDF Document Conversion Using Recurrent Neu.pdf:/Users/tullsen/Zotero/storage/IQLNRCDW/Livathinos et al. - 2021 - Robust PDF Document Conversion Using Recurrent Neu.pdf:application/pdf},
}

@inproceedings{jacksonUsingAutomatedDependency2011,
	title = {Using {Automated} {Dependency} {Analysis} {To} {Generate} {Representation} {Information}},
	url = {http://arxiv.org/abs/1111.0735},
	abstract = {To preserve access to digital content, we must preserve the representation information that captures the intended interpretation of the data. In particular, we must be able to capture performance dependency requirements, i.e. to identify the other resources that are required in order for the intended interpretation to be constructed successfully. Critically, we must identify the digital objects that are only referenced in the source data, but are embedded in the performance, such as fonts. This paper describes a new technique for analysing the dynamic dependencies of digital media, focussing on analysing the process that underlies the performance, rather than parsing and deconstructing the source data. This allows the results of format-specific characterisation tools to be verified independently, and facilitates the generation of representation information for any digital media format, even when no suitable characterisation tool exists.},
	language = {English},
	urldate = {2021-03-27},
	booktitle = {{arXiv}:1111.0735 [cs]},
	author = {Jackson, Andrew N.},
	month = nov,
	year = {2011},
	note = {arXiv: 1111.0735},
	keywords = {Computer Science - Digital Libraries, D.2.8, H.m},
	pages = {4},
	file = {Jackson - 2011 - Using Automated Dependency Analysis To Generate Re.pdf:/Users/tullsen/Zotero/storage/JDDHRQFJ/Jackson - 2011 - Using Automated Dependency Analysis To Generate Re.pdf:application/pdf},
}

@article{alpizar-chaconKnowledgeModelsPDF2021,
	title = {Knowledge models from {PDF} textbooks},
	issn = {1361-4568, 1740-7842},
	url = {https://www.tandfonline.com/doi/full/10.1080/13614568.2021.1889692},
	doi = {10.1080/13614568.2021.1889692},
	abstract = {Textbooks are educational documents created, structured and formatted by domain experts with the primary purpose to explain the knowledge in the domain to a novice. Authors use their understanding of the domain when structuring and formatting the content of a textbook to facilitate this explanation. As a result, the formatting and structural elements of textbooks carry the elements of domain knowledge implicitly encoded by their authors. Our paper presents an extensible approach towards automated extraction of knowledge models from textbooks and enrichment of their content with additional links (both internal and external). The textbooks themselves essentially become hypertext documents where individual pages are annotated with important concepts in the domain. The evaluation experiments examine several aspects and stages of the approach, including the accuracy of model extraction, the pragmatic quality of extracted models using one of their possible applications— semantic linking of textbooks in the same domain, the accuracy of linking models to external knowledge sources and the eﬀect of integration of multiple textbooks from the same domain. The results indicate high accuracy of model extraction on symbolic, syntactic and structural levels across textbooks and domains, and demonstrate the added value of the extracted models on the semantic level.},
	language = {en},
	urldate = {2021-03-06},
	journal = {New Review of Hypermedia and Multimedia},
	author = {Alpizar-Chacon, Isaac and Sosnovsky, Sergey},
	month = feb,
	year = {2021},
	pages = {1--49},
	file = {Alpizar-Chacon and Sosnovsky - 2021 - Knowledge models from PDF textbooks.pdf:/Users/tullsen/Zotero/storage/GJXELKJJ/Alpizar-Chacon and Sosnovsky - 2021 - Knowledge models from PDF textbooks.pdf:application/pdf},
}

@article{loskotPdfPapersShellscriptUtilities2021,
	title = {{pdfPapers}: shell-script utilities for frequency-based multi-word phrase extraction from {PDF} documents},
	shorttitle = {{pdfPapers}},
	url = {http://arxiv.org/abs/2101.10554},
	abstract = {Biomedical research is intensive in processing information in the previously published papers. This motivated a lot of efforts to provide tools for text mining and information extraction from PDF documents over the past decade. The *nix (Unix/Linux) operating systems offer many tools for working with text ﬁles, however, very few such tools are available for processing the contents of PDF ﬁles. This paper reports our effort to develop shell script utilities for *nix systems with the core functionality focused on viewing and searching multiple PDF documents combining logical and regular expressions, and enabling more reliable text extraction from PDF documents with subsequent manipulation of the resulting blocks of text. Furthermore, a procedure for extracting the most frequently occurring multi-word phrases was devised and then demonstrated on several scientiﬁc papers in life sciences. Our experiments revealed that the procedure is surprisingly robust to deﬁciencies in text extraction and the actual scoring function used to rank the phrases in terms of their importance or relevance. The keyword relevance is strongly context dependent, the word stemming did not provide any recognizable advantage, and the stop-words should only be removed from the beginning and the end of phrases. In addition, the developed utilities were used to convert the list of acronyms and the index from a PDF e-book into a large list of biochemical terms which can be exploited in other text mining tasks. All shell scripts and data ﬁles are available in a public repository named pdfP apers on the Github. The key lesson learned in this work is that semi-automated methods combining the power of algorithms with the capabilities of research experience are the most promising for improving the research efﬁciency.},
	language = {English},
	urldate = {2021-01-31},
	journal = {arXiv:2101.10554 [cs, q-bio]},
	author = {Loskot, Pavel},
	month = jan,
	year = {2021},
	note = {arXiv: 2101.10554
Comment: 23 pages, 4 figures, 10 tables},
	keywords = {Computer Science - Information Retrieval, Quantitative Biology - Quantitative Methods},
	file = {Loskot - 2021 - pdfPapers shell-script utilities for frequency-ba.pdf:/Users/tullsen/Zotero/storage/BUAUK448/Loskot - 2021 - pdfPapers shell-script utilities for frequency-ba.pdf:application/pdf},
}

@inproceedings{haoTableDetectionMethod2016,
	address = {Santorini, Greece},
	title = {A {Table} {Detection} {Method} for {PDF} {Documents} {Based} on {Convolutional} {Neural} {Networks}},
	isbn = {978-1-5090-1792-8},
	url = {http://ieeexplore.ieee.org/document/7490132/},
	doi = {10.1109/DAS.2016.23},
	abstract = {Because of the better performance of deep learning on many computer vision tasks, researchers in the area of document analysis and recognition begin to adopt this technique into their work. In this paper, we propose a novel method for table detection in PDF documents based on convolutional neutral networks, one of the most popular deep learning models. In the proposed method, some table-like areas are selected first by some loose rules, and then the convolutional networks are built and refined to determine whether the selected areas are tables or not. Besides, the visual features of table areas are directly extracted and utilized through the convolutional networks, while the non-visual information (e.g. characters, rendering instructions) contained in original PDF documents is also taken into consideration to help achieve better recognition results. The primary experimental results show that the approach is effective in table detection.},
	language = {English},
	urldate = {2020-12-29},
	booktitle = {2016 12th {IAPR} {Workshop} on {Document} {Analysis} {Systems} ({DAS})},
	publisher = {IEEE},
	author = {Hao, Leipeng and Gao, Liangcai and Yi, Xiaohan and Tang, Zhi},
	month = apr,
	year = {2016},
	pages = {287--292},
	file = {Hao et al. - 2016 - A Table Detection Method for PDF Documents Based o.pdf:/Users/tullsen/Zotero/storage/T22TM4LY/Hao et al. - 2016 - A Table Detection Method for PDF Documents Based o.pdf:application/pdf},
}

@article{hasanAdaptivePortableDocument2020,
	title = {Adaptive {Portable} {Document} {Format} ({PDF}) {File} {Headers} for {Indexing} {Electronic} {Library}},
	volume = {2020},
	issn = {19924453},
	url = {https://www.iasj.net/iasj/article/191186},
	abstract = {Recently, different types of Portable Document Format (PDF) documents, such as e-books, papers, and journals are essentially needed to be utilized, stored, and retrieved when working with electronic library. In this paper, a flexible and simple technique is presented and thoroughly discussed for automatically arranging PDF files. The proposed system is mainly depending on the structure of the PDF file and its header information to guide the process of inserting important classification meta data in specific region of the header region. The proposed PDF format provides electronic library systems with the ability to arrange and configure enormous number of PDF files in a rapid time. Thus, the new format can easily be used in many applications due to its ability to offer simple and flexible technique.},
	language = {English, Arabic},
	number = {35},
	urldate = {2020-12-12},
	journal = {Journal of Al-Ma'moon College},
	author = {Hasan, Taif Sami and Khalaf, Ahmed Lateef},
	month = dec,
	year = {2020},
	note = {Publisher: AlMamon University College},
	pages = {357--371},
	file = {Hasan and Khalaf - 2020 - Adaptive Portable Document Format (PDF) File Heade.pdf:/Users/tullsen/Zotero/storage/CF6958UP/Hasan and Khalaf - 2020 - Adaptive Portable Document Format (PDF) File Heade.pdf:application/pdf},
}

@inproceedings{shahidTOCGenerationPDF2020,
	title = {{TOC} generation in {PDF} {Document} for {Smart} {Automated} {Compliance} {Engine}},
	volume = {5},
	url = {https://ieeexplore.ieee.org/abstract/document/9265792},
	doi = {10.1109/RAEECS50817.2020.9265792},
	abstract = {Portable Document Format (PDF) is a commonly used format for the scientific publication. Currently, an input document is used to test the compliance and relevance of the document or text in Automated Compliance Engines and Natural Language Processing(NLP) based system. The whole document text is used for searching the compliance rules which is computationally expensive and slow process. For speeding up the compliance checking process and making it cost efficient, this paper purposes a method based on Table of Content(TOC) Data Structure. This work proposed the PDFparser which performs Data Indexing, separate headings text, and non-heading text, create hierarchy of headings and generates TOC to reduce the semantic-based string searching time and space. Furthermore, in the NLP based system, mostly semantic-based string matching used. The proposed PDFparser uses the Cosine Similarity method for computing semantic based similarity. Our purposed method performs 47.2\% better than the previous approach of searching in the non-indexed whole document and decreases the search time and space. In the worst-case scenario, where no string match found, our purposed method performs 20.5 \% better.},
	booktitle = {2020 {International} {Symposium} on {Recent} {Advances} in {Electrical} {Engineering} {Computer} {Sciences} ({RAEE} {CS})},
	publisher = {IEEE},
	author = {Shahid, M. H. and Islam, M. A.},
	month = oct,
	year = {2020},
	keywords = {Portable document format, Data mining, XML, Semantics, Layout, Data structure, Document and Text Processing, Indexing, Information Retrieval, Pdf Extraction, Reducing search space, Search problems, TOC Generation},
	pages = {1--5},
	file = {Shahid and Islam - 2020 - TOC generation in PDF Document for Smart Automated.pdf:/Users/tullsen/Zotero/storage/QEY7TL3E/Shahid and Islam - 2020 - TOC generation in PDF Document for Smart Automated.pdf:application/pdf},
}

@phdthesis{aignerExtractionTabularInformation2020,
	address = {Austria},
	type = {Diploma in {Data} {Science}},
	title = {Extraction of {Tabular} {Information} from {PDF} {Documents} - {A} graph-based {Approach}},
	url = {https://repositum.tuwien.at/bitstream/20.500.12708/16174/2/Extraction%20of%20Tabular%20Information%20from%20PDF%20Documents%20-%20A%20graph-based%20Approach.pdf},
	abstract = {A structured content representation is the foundation for several systems, be it for anautomated processing of documents when considering intelligent workflows or for generalqueries in the World Wide Web. However, vast amounts of information are stored inunstructured formats. A wide-spread example of such a format is the Portable DocumentFormat (PDF). Therefore, a lot of unlocked potential lies in the derivation of structuralcharacteristics from unstructured file formats. Important information in documents isoften stored in tables, which provide a quite dense representation of content. However,detecting tables is a difficult task because of an infeasible amount of possible differentvisual or structural representations. Nowadays, most state-of-the-art systems for thistask use Neural Networks which need a high amount of training data and have difficultieswith handling out-of-the-box data. In this work, a system is developed with the aimof detecting and extracting tables from PDFs under a generalization over all kinds oflayouts. This goal is directly related to the research question, namely how graph-basedand unsupervised approaches compare to recently developed Neural Networks for tablerecognition. In this work, textual content is extracted directly from PDF documents.Then, a Visibility Graph from the extracted layout elements is created, indicating theirrelationships.  On this graph, a clustering procedure is employed which segments apage into logical groups. From the resulting segments, tables are then classified usingmultimodal features. The evaluation shows that the developed system performs well ondifferent and unseen layouts, irrespective of the visual representations of tables. Thiscan be achieved without specific parameter tweaks, even though these can lead to evenbetter performances on known table layouts and the system provides a possibility forsuch adaptions.},
	language = {English},
	school = {University of Wein},
	author = {Aigner, Michael Benedikt},
	month = oct,
	year = {2020},
	file = {Aigner - Extraction of Tabular Information from PDF Documen.pdf:/Users/tullsen/Zotero/storage/BGCBVK5D/Aigner - Extraction of Tabular Information from PDF Documen.pdf:application/pdf},
}

@article{neumannPAWLSPDFAnnotation2021,
	title = {{PAWLS}: {PDF} {Annotation} {With} {Labels} and {Structure}},
	shorttitle = {{PAWLS}},
	url = {http://arxiv.org/abs/2101.10281},
	abstract = {Adobe’s Portable Document Format (PDF) is a popular way of distributing view-only documents with a rich visual markup. This presents a challenge to NLP practitioners who wish to use the information contained within PDF documents for training models or data analysis, because annotating these documents is difﬁcult. In this paper, we present PDF Annotation with Labels and Structure (PAWLS), a new annotation tool designed speciﬁcally for the PDF document format. PAWLS is particularly suited for mixed-mode annotation and scenarios in which annotators require extended context to annotate accurately. PAWLS supports span-based textual annotation, N-ary relations and freeform, non-textual bounding boxes, all of which can be exported in convenient formats for training multi-modal machine learning models. A read-only PAWLS server is available at https://pawls. apps.allenai.org/ 1 and the source code is available at https://github. com/allenai/pawls.},
	language = {English},
	urldate = {2021-01-31},
	journal = {arXiv:2101.10281 [cs]},
	author = {Neumann, Mark and Shen, Zejiang and Skjonsberg, Sam},
	month = jan,
	year = {2021},
	note = {arXiv: 2101.10281},
	keywords = {Computer Science - Computation and Language},
	file = {Neumann et al. - 2021 - PAWLS PDF Annotation With Labels and Structure.pdf:/Users/tullsen/Zotero/storage/2G7A8XGL/Neumann et al. - 2021 - PAWLS PDF Annotation With Labels and Structure.pdf:application/pdf},
}

@article{patankarSmartReaderImage2020,
	title = {Smart reader (image/{PDF} to speech converter) using {Python} {Flask} and optical character recognition ({OCR}) engine},
	volume = {02},
	issn = {2582-5208},
	url = {https://irjmets.com/rootaccess/forms/uploads/IRJMETS372827.pdf},
	abstract = {Smart Reader is a free website that automatically generates speech file of the text contained in Portable Document Form (PDF) and Images. Its main aim is to assist patients laid low with impairment. The major idea of this project is to acknowledge the text character and convert it into audio file. The text contained within the page is first pre-processed. Firstly, pre-processing of text takes place. The preprocessing module prepares the text for recognition. Further, segmentation of text takes place due to which characters are separated from each other. After segmentation, extraction of letters and resizing them takes place into the text file. All the above processes occur with the help of OCR. This text is then converted into speech for visually impaired people with the help of Google Text-to-Speech API.},
	language = {English},
	number = {12},
	journal = {International Research Journal of Modernization in Engineering Technology and Science},
	author = {Patankar, Samiksha and Bishnoi, Raghav and Porwal, Ritik and Tomar, Ujjawal and Ali, Dr Asif},
	month = dec,
	year = {2020},
	pages = {7},
	file = {Patankar et al. - 2020 - Smart reader (imagePDF to speech converter) using.pdf:/Users/tullsen/Zotero/storage/MUHMPH9H/Patankar et al. - 2020 - Smart reader (imagePDF to speech converter) using.pdf:application/pdf},
}

@article{waiTableDetectionExtraction2020,
	title = {Table {Detection} and {Extraction} with {XML} {Format} from {PDF} {Documents}},
	volume = {01},
	abstract = {Tables are ubiquitous in digital libraries. Tables are simple representations of information that show relationships between concepts. Most publications use tables to present, list, summarize, structure the important data in document and concrete findings of research report. Tables allow the authors to present information in a structured manner and to communicate and summarize key results and main facts. In information retrieval, understanding the structure of the table and automatically extracting the content of the table are very important. In this paper, the system proposes to automatically detect and extracted the contents of tables from PDF document.This system is devised on a novel page box-cutting method. By using this method, the system can provide all the necessary facts of a table for the table searching and can improve the performance of table detection. This paper describes techniques for table metadata extraction with XML format and tests on PDF documents.Finally, for table extraction,the comparison of heuristic methods and page box-cutting method are mentioned.},
	language = {English},
	number = {01},
	journal = {University Journal of Creativity and Innovative Research2020},
	author = {Wai, Pan Nu and Lwin, Cho Cho and Moe, Nwey Zin},
	year = {2020},
	pages = {5},
	file = {Wai et al. - 2020 - Table Detection and Extraction with XML Format fro.pdf:/Users/tullsen/Zotero/storage/37YICAZL/Wai et al. - 2020 - Table Detection and Extraction with XML Format fro.pdf:application/pdf},
}

@article{mittlebachfrankLATEXTaggedPDFBlueprint2020,
	title = {{LATEX} {TaggedPDF}— {A} blueprint for a largeproject},
	volume = {41},
	url = {https://www.tug.org/TUGboat/tb41-3/tb129mitt-tagpdf.pdf},
	abstract = {In Frank’s talk at the TUG2020 online conferencewe announced the start of a multi-year project to enhance LATEX to fully and naturally support the creation of structured document formats, in particular the “tagged PDF” format as required by accessibility standards such as PDF/UA.
In this short article we outline the background to this project and some of its history so far.  We then describe the major features of the project and the tasks involved, of which more details can be found in the Feasibility Study [8] that was prepared as the first part of our co-operation with Adobe. This leads on to a description of how we plan to use the study as the basis for our work on the project and some details of our planned working methodologies, illustrated by what we have achieved so far and leading to a discussion of some of the obstacles we foresee.
Finally there is also a summary of recent, current and upcoming activities on and around the project.},
	language = {English},
	number = {3},
	journal = {TUGboat (TeX Users Group Journal)},
	author = {{Mittlebach, Frank} and {Rowley, Chris}},
	month = nov,
	year = {2020},
	pages = {292--298},
	file = {Mittlebach, Frank and Rowley, Chris - 2020 - LATEX TaggedPDF— A blueprint for a largeproject.pdf:/Users/tullsen/Zotero/storage/RQ9H8DRV/Mittlebach, Frank and Rowley, Chris - 2020 - LATEX TaggedPDF— A blueprint for a largeproject.pdf:application/pdf},
}

@phdthesis{ronacherMachineLearningbasedLocation2020,
	address = {Graz, Austria},
	type = {Masters of {Computer} {Science}},
	title = {Machine {Learning}-based {Location} {Detection} of {Mathematical} {Expressions} in {PDF}},
	url = {https://www.know-center.tugraz.at/filemaker/pdf_wissarbeiten/thesis-ronacher-final.pdf},
	abstract = {Portable Document Format (PDF) is one of the most commonly used ?le formats. Many current PDF viewers support copy-and-paste for ordinary text, but not for mathematical expressions, which appear frequently in scienti?c documents. If one were able to extract a mathematical expression and convert them into another format, such as LATEX or MathML, the information contained in this expression would become accessible for a wide array of applications, for instance screen readers. An important step to achieve this goal is ?nding the precise location of mathematical expressions, since this is the only unsolved step in the formula extraction pipeline. Accurately performing this crucial step is the main objective of this thesis. Unlike previous research, we use a novel whitespace analysis technique to demarcate coherent regions within a PDF page. We then use the identi?ed regions to compute carefully selected features from two sources: the grayscale matrix of the rendered PDF ?le and the list of objects within the parsed PDF ?le. ?e computed features can be used as input for various classi?ers based on machine learning techniques. In our experiments we contrast four di?erent variants of our method, where each uses a di?erent machine learning algorithm for classi?cation. Further, we also aim to compare our approach with three state of the art formula detectors. However, the low reproducibility of these three methods combined with logical inconsistencies in their documentation greatly complicated a faithful comparison with our method, leaving the true state of the art unclear, which warrants further research.},
	language = {English},
	school = {Graz University of Technology},
	author = {Ronacher, Lisa},
	month = aug,
	year = {2020},
	file = {Ronacher - Machine Learning-based Location Detection of Mathe.pdf:/Users/tullsen/Zotero/storage/2R436JZR/Ronacher - Machine Learning-based Location Detection of Mathe.pdf:application/pdf},
}

@inproceedings{zulfiqarAutomatedGenerationAccessible2020,
	address = {New York, NY, USA},
	series = {{ASSETS} '20},
	title = {Automated {Generation} of {Accessible} {PDF}},
	isbn = {978-1-4503-7103-2},
	url = {https://doi.org/10.1145/3373625.3418045},
	doi = {10.1145/3373625.3418045},
	abstract = {LaTeX is widely used in STEM fields for creating high-quality documents that are converted to the Portable Document Format (PDF) for dissemination. Currently, available LaTeX systems do not guarantee that the generated PDFs are compliant with international accessibility standards. In this work, we present AGAP (Automated Generation of Accessible PDF) that automates and makes accessible the process of generating accessible PDFs from LaTeX. AGAP flags accessibility violations and provides guidance on how to fix them at compile time. AGAP allows interaction through speech synthesis and keyboard shortcuts thus making it fully accessible to persons with vision impairments (PVIs). Evaluating the accessible PDF generated using AGAP with a standard accessibility checker resulted in a much smaller number of violations as opposed to the PDF generated using another desktop LaTeX editor.},
	urldate = {2020-11-02},
	booktitle = {The 22nd {International} {ACM} {SIGACCESS} {Conference} on {Computers} and {Accessibility}},
	publisher = {Association for Computing Machinery},
	author = {Zulfiqar, Shaban and Arooj, Safa and Hayat, Umar and Shahid, Suleman and Karim, Asim},
	month = oct,
	year = {2020},
	keywords = {LaTeX, PDF Accessibility, Text to Speech (TTS)},
	pages = {1--3},
	file = {Zulfiqar et al. - 2020 - Automated Generation of Accessible PDF.pdf:/Users/tullsen/Zotero/storage/K8HSUFWA/Zulfiqar et al. - 2020 - Automated Generation of Accessible PDF.pdf:application/pdf},
}

@inproceedings{uckunOntologyDrivenTransformationsPDF2020,
	address = {New York, NY, USA},
	series = {{ASSETS} '20},
	title = {Ontology-{Driven} {Transformations} for {PDF} {Form} {Accessibility}},
	isbn = {978-1-4503-7103-2},
	url = {https://doi.org/10.1145/3373625.3418047},
	doi = {10.1145/3373625.3418047},
	abstract = {Filling out PDF forms with screen readers has always been a challenge for people who are blind. Many of these forms are not interactive and hence are not accessible; even if they are interactive, the serial reading order of the screen reader makes it difficult to associate the correct labels with the form fields. This demo will present TransPAc[5], an assistive technology that enables blind people to fill out PDF forms. Since blind people are familiar with web browsing, TransPAc leverages this fact by faithfully transforming a PDF document with forms into a HTML page. The blind user fills out the form fields in the HTML page with their screen reader and these filled-in data values are transparently transferred onto the corresponding form fields in the PDF document. TransPAc thus addresses a long standing problem in PDF form accessibility.},
	urldate = {2020-11-02},
	booktitle = {The 22nd {International} {ACM} {SIGACCESS} {Conference} on {Computers} and {Accessibility}},
	publisher = {Association for Computing Machinery},
	author = {Uckun, Utku and Aydin, Ali Selman and Ashok, Vikas and Ramakrishnan, IV},
	month = oct,
	year = {2020},
	keywords = {accessible interfaces, PDF form accessibility, screen-reader users},
	pages = {1--3},
	file = {Uckun et al. - 2020 - Ontology-Driven Transformations for PDF Form Acces.pdf:/Users/tullsen/Zotero/storage/X2I92BR6/Uckun et al. - 2020 - Ontology-Driven Transformations for PDF Form Acces.pdf:application/pdf},
}

@article{uckunBreakingAccessibilityBarrier2020,
	title = {Breaking the {Accessibility} {Barrier} in {Non}-{Visual} {Interaction} with {PDF} {Forms}},
	volume = {4},
	url = {https://doi.org/10.1145/3397868},
	doi = {10.1145/3397868},
	abstract = {PDF forms are ubiquitous. Businesses big and small, government agencies, health and educational institutions and many others have all embraced PDF forms. People use PDF forms for providing information to these entities. But people who are blind frequently find it very difficult to fill out PDF forms with screen readers, the standard assistive software that they use for interacting with computer applications. Firstly, many of the them are not even accessible as they are non-interactive and hence not editable on a computer. Secondly, even if they are interactive, it is not always easy to associate the correct labels with the form fields, either because the labels are not meaningful or the sequential reading order of the screen reader misses the visual cues that associate the correct labels with the fields. In this paper we present a solution to the accessibility problem of PDF forms. We leverage the fact that many people with visual impairments are familiar with web browsing and are proficient at filling out web forms. Thus, we create a web form layer over the PDF form via a high fidelity transformation process that attempts to preserve all the spatial relationships of the PDF elements including forms, their labels and the textual content. Blind people only interact with the web forms, and the filled out web form fields are transparently transferred to the corresponding fields in the PDF form. An optimization algorithm automatically adjusts the length and width of the PDF fields to accommodate arbitrary size field data. This ensures that the filled out PDF document does not have any truncated form-field values, and additionally, it is readable. A user study with fourteen users with visual impairments revealed that they were able to populate more form fields than the status quo and the self-reported user experience with the proposed interface was superior compared to the status quo.},
	number = {EICS},
	urldate = {2020-09-07},
	journal = {Proceedings of the ACM on Human-Computer Interaction},
	author = {Uckun, Utku and Aydin, Ali Selman and Ashok, Vikas and Ramakrishnan, IV},
	month = jun,
	year = {2020},
	keywords = {PDF accessibility, accessible interfaces, screen-reader users, form accessibility},
	pages = {80:1--80:16},
	file = {Uckun et al. - 2020 - Breaking the Accessibility Barrier in Non-Visual I.pdf:/Users/tullsen/Zotero/storage/DQAFVXQA/Uckun et al. - 2020 - Breaking the Accessibility Barrier in Non-Visual I.pdf:application/pdf},
}

@article{yuExtractingBodyText2020,
	title = {Extracting {Body} {Text} from {Academic} {PDF} {Documents} for {Text} {Mining}},
	url = {http://arxiv.org/abs/2010.12647},
	abstract = {Accurate extraction of body text from PDF-formatted academic documents is essential in text-mining applications for deeper semantic understandings. The objective is to extract complete sentences in the body text into a txt file with the original sentence flow and paragraph boundaries. Existing tools for extracting text from PDF documents would often mix body and nonbody texts. We devise and implement a system called PDFBoT to detect multiple-column layouts using a line-sweeping technique, remove nonbody text using computed text features and syntactic tagging in backward traversal, and align the remaining text back to sentences and paragraphs. We show that PDFBoT is highly accurate with average F1 scores of, respectively, 0.99 on extracting sentences, 0.96 on extracting paragraphs, and 0.98 on removing text on tables, figures, and charts over a corpus of PDF documents randomly selected from arXiv.org across multiple academic disciplines.},
	language = {English},
	urldate = {2020-11-01},
	journal = {arXiv:2010.12647 [cs]},
	author = {Yu, Changfeng and Zhang, Cheng and Wang, Jie},
	month = oct,
	year = {2020},
	note = {arXiv: 2010.12647},
	keywords = {Computer Science - Information Retrieval},
	pages = {8},
	file = {Yu et al. - 2020 - Extracting Body Text from Academic PDF Documents f.pdf:/Users/tullsen/Zotero/storage/3TUSDFW5/Yu et al. - 2020 - Extracting Body Text from Academic PDF Documents f.pdf:application/pdf},
}

@inproceedings{draganahmetovicAutomaticTaggingFormulae2020,
	address = {Virtual},
	series = {1},
	title = {Automatic {Tagging} of {Formulae} in {PDF} {Documents} and {Assistive} {Technologies} for {Visually} {Impaired} {People}: {The} {LaTeX} {Package} axessibility 3.0},
	volume = {1},
	isbn = {978-3-9504630-2-6},
	url = {https://www.icchp.org/sites/default/files/ED_1_ICCHP_Forum.pdf#page=69},
	abstract = {Assistive technologies for visually impaired people (e.g., screen readers, Braille displays, magnifiers) work well with digital documents containing structured text. On the other hand, when digital documents contain mathematical formulae, there are still many issues concerning the accessibility that should be addressed. In the recent years, many improvements have been achieved, but a comprehensive solution is still far to be obtained.
For instance, different multimodal systems to write and read scientific documents through nonvisual tools have been developed. One of the most used tools is the LAMBDA editor [2], that allows blind people to write and process text and mathematical formulae through Braille display and speech output. However, LAMBDA is not a mainstream tool to produce accessible scientific content by sighted people. Another way for allowing the reading of digital scientific documents by visually impaired people involves the use of MathML in web pages, also through MathJax (see, e.g, [5]). Indeed, MathML, being a markup language intended to the writing of formulae, can be interpreted by most common screen readers to generate a verbal description of the formula [3, 16]. Moreover, MathPlayer, a web browser plug-in for rendering MathML on the screen, through speech output and on Braille devices, enables hierarchical navigation of mathematical formulae, including bi-dimensional notations used, e.g., for matrices [14]. MathJax can be embedded in web pages to enable adaptable accessibility features for representing and navigating formulae (e.g., LaTeX, ASCIIMath or CSS representation; [6, 7]). However, MathML is not used for authoring documents but only for displaying.
The LaTeX language can overcome the above issues, because it is widely used by the academic community for writing scientific documents and producing PDF documents. Several works [8-10], [13], [15], [18] exploit LaTeX in different ways for improving the accessibility of scientific documents, both for the writing and the reading. Unfortunately, since these tools are produced for a small community, due to the rapid evolution of technology, they often incur in maintenance and compliance issues. Therefore, in general, the PDF documents obtained from LaTeX were not accessible, because a tagged structure is missing and the formulae are not readable at all by screen readers. It may be possible to add accessibility features to mathematical content as alternate text and to tag manually the structure of the obtained PDF documents. It can be specified manually using, for example, a proprietary editor such as Adobe Acrobat. Guidelines have been produced to create accessible PDF according to this procedure [17] with a focus on mathematical content [11, 12], [4]. However, this approach requires the availability of a suitable editor, and it entails additional labor from the document author. Furthermore, alternate text most often does not carry the same semantic value as the original mathematical content.},
	language = {English},
	booktitle = {Future {Perspectives} of {AT}, {eAccessibility} and {eInclusion}},
	publisher = {Petz Miesenberger},
	author = {{Dragan Ahmetovic} and {Tiziana Armano} and {Cristian Bernareggi} and {Anna Capietto} and {Sandro Coriasco} and {Boris Doubrov} and {Alexandr Kozlovskiy} and {Nadir Murru}},
	month = sep,
	year = {2020},
	pages = {69--73},
	file = {Dragan Ahmetovic et al. - 2020 - Automatic Tagging of Formulae in PDF Documents and.pdf:/Users/tullsen/Zotero/storage/NN94TP5M/Dragan Ahmetovic et al. - 2020 - Automatic Tagging of Formulae in PDF Documents and.pdf:application/pdf},
}

@inproceedings{kohaseLayoutAnalysisPDF2020,
	address = {Cham},
	series = {Lecture {Notes} in {Computer} {Science}},
	title = {Layout {Analysis} of {PDF} {Documents} by {Two}-{Dimensional} {Grammars} for the {Production} of {Accessible} {Textbooks}},
	volume = {12376},
	isbn = {978-3-030-58796-3},
	shorttitle = {{LNCS}},
	url = {https://link.springer.com/chapter/10.1007/978-3-030-58796-3_38},
	doi = {10.1007/978-3-030-58796-3_38},
	abstract = {This paper proposes the use of two-dimensional context-free grammars (2DCFGs) for layout analysis of PDF documents. In Japan, audio textbooks have been available for students with print disabilities in compulsory education. In order to create accessible textbooks including audio textbooks, it is necessary to obtain the information of structure and the reading order of documents of regular textbooks in PDF. It is not simple task because most PDF files only have the information how to print them out, and page-layouts of most textbooks are complex. By using 2DCFGs, we could obtain useful information of regular textbooks in PDF for the production of accessible textbooks.},
	language = {English},
	booktitle = {Computers {Helping} {People} with {Special} {Needs}},
	publisher = {Springer International Publishing},
	author = {Kohase, Kento and Nakamura, Shunsuke and Fujiyoshi, Akio},
	editor = {Miesenberger, Klaus and Manduchi, Roberto and Covarrubias Rodriguez, Mario and Peňáz, Petr},
	month = sep,
	year = {2020},
	keywords = {PDF documents, Accessible textbooks, Layout analysis, Two-dimensional grammars},
	pages = {321--328},
}

@inproceedings{nakamuraSeriesSimpleProcessing2020,
	address = {Cham},
	series = {Lecture {Notes} in {Computer} {Science}},
	title = {A {Series} of {Simple} {Processing} {Tools} for {PDF} {Files} for {People} with {Print} {Disabilities}},
	isbn = {978-3-030-58796-3},
	url = {https://link.springer.com/chapter/10.1007/978-3-030-58796-3_37},
	doi = {10.1007/978-3-030-58796-3_37},
	abstract = {This paper presents simple processing tools for PDF files for people with print disabilities. They consist of the following three tools: “PDFcontentEraser”, “PDFfontChanger” and “PDFcontentExtracter.” PDFcontentEraser is a tool to remove a certain type of elements in a PDF file. PDFfontChanger is a tool to change a selection of fonts in a document. PDFcontentExtracter is a tool to retrieve the components of a PDF file.},
	language = {English},
	booktitle = {Computers {Helping} {People} with {Special} {Needs}},
	publisher = {Springer International Publishing},
	author = {Nakamura, Shunsuke and Kohase, Kento and Fujiyoshi, Akio},
	editor = {Miesenberger, Klaus and Manduchi, Roberto and Covarrubias Rodriguez, Mario and Peňáz, Petr},
	month = sep,
	year = {2020},
	keywords = {Accessibility, PDF documents, Print disability, Universal design font},
	pages = {314--320},
}

@inproceedings{bradyCreatingAccessiblePDFs2015,
	address = {Florence, Italy},
	series = {{W4A} '15},
	title = {Creating accessible {PDFs} for conference proceedings},
	isbn = {978-1-4503-3342-9},
	url = {https://doi.org/10.1145/2745555.2746665},
	doi = {10.1145/2745555.2746665},
	abstract = {A responsibility we have as researchers is to disseminate the results of our research widely. A primary way we do this is through research publications. When these publications are not accessible to everyone, some readers will be excluded and the impact of our research limited. In this paper, we explore this problem in two ways. First, we report on the accessibility of 1,811 papers in the technical program of several top conferences related to accessibility and human-computer interaction. Second, we reflect on our experience making papers accessible for any CHI 2015 author who requested it. We offer thoughts on research challenges and future work that may make our community's research more accessible.},
	urldate = {2020-05-09},
	booktitle = {Proceedings of the 12th {Web} for {All} {Conference}},
	publisher = {Association for Computing Machinery},
	author = {Brady, Erin and Zhong, Yu and Bigham, Jeffrey P.},
	month = may,
	year = {2015},
	pages = {1--4},
	file = {Brady et al. - 2015 - Creating accessible PDFs for conference proceeding.pdf:/Users/tullsen/Zotero/storage/AX2H9YWX/Brady et al. - 2015 - Creating accessible PDFs for conference proceeding.pdf:application/pdf},
}

@inproceedings{potvinPositionBasedMethodExtraction2016,
	address = {Caulfield, VIC, Australia},
	series = {{ADCS} '16},
	title = {A {Position}-{Based} {Method} for the {Extraction} of {Financial} {Information} in {PDF} {Documents}},
	isbn = {978-1-4503-4865-2},
	url = {https://doi.org/10.1145/3015022.3015024},
	doi = {10.1145/3015022.3015024},
	abstract = {Financial documents are omnipresent and necessitate extensive human efforts in order to extract, validate and export their content. Considering the high importance of such data for effective business decisions, the need for accuracy goes beyond any attempt to accelerate the process or save resources. While many methods have been suggested in the literature, the problem to automatically extract reliable financial data remains difficult to solve in practice and even more challenging to implement in a real life context. This difficulty is driven by the specific nature of financial text where relevant information is principally contained in tables of varying formats. Table Extraction (TE) is considered as an essential but difficult step for restructuring data in a handleable format by identifying and decomposing table components. In this paper, we present a novel method for extracting financial information by the means of two simple heuristics. Our approach is based on the idea that the position of information, in unstructured but visually rich documents - as it is the case for the Portable Document Format (PDF) - is an indicator of semantic relatedness. This solution has been developed in partnership with the Caisse de Depot et Placement du Québec. We present here our method and its evaluation on a corpus of 600 financial documents, where an F-measure of 91\% is reached.},
	urldate = {2020-05-09},
	booktitle = {Proceedings of the 21st {Australasian} {Document} {Computing} {Symposium}},
	publisher = {Association for Computing Machinery},
	author = {Potvin, Benoit and Villemaire, Roger and Le, Ngoc-Tan},
	month = dec,
	year = {2016},
	keywords = {PDF documents, Financial information retrieval, industrial system, semantic relatedness},
	pages = {9--16},
	file = {Potvin et al. - 2016 - A Position-Based Method for the Extraction of Fina.pdf:/Users/tullsen/Zotero/storage/Z5FFMI54/Potvin et al. - 2016 - A Position-Based Method for the Extraction of Fina.pdf:application/pdf},
}

@article{brackneyStudentCenteredReadingUsing2020,
	title = {Student-{Centered} {Reading} {Using} {PDF} {Software} as an {Innovative} {Teaching} {Strategy} in {Graduate} {Nursing} {Education}},
	volume = {Publish Ahead of Print},
	issn = {1536-5026},
	url = {https://journals.lww.com/neponline/Abstract/9000/Student_Centered_Reading_Using_PDF_Software_as_an.99546.aspx},
	doi = {10.1097/01.NEP.0000000000000656},
	abstract = {Student engagement in scholarly reading and critical thinking contributes to nursing science. However, graduate students often lack the skills and experience needed to read and analyze scholarly writings in depth. This case example describes use of portable document format technology with reading prompts to improve student scholarship in a master of science in nursing program. Theory and research guided development of the visible reading innovation. Student work with this pilot assignment sequence indicates an increase in student engagement with the text, as evidenced by student comment content, comment spacing throughout the text, and text integration into a summative writing assignment.},
	language = {en-US},
	urldate = {2020-04-28},
	journal = {Nursing Education Perspectives},
	author = {Brackney, Dana E. and Lane, Susan H.},
	month = apr,
	year = {2020},
}

@inproceedings{sheltonPDFReadabilityEnhancement2020,
	address = {Taipei, Taiwan},
	series = {{W4A} '20},
	title = {{PDF} readability enhancement on mobile devices},
	isbn = {978-1-4503-7056-1},
	url = {https://doi.org/10.1145/3371300.3383352},
	doi = {10.1145/3371300.3383352},
	abstract = {In the past, researchers studied readability enhancement of English articles for non-native English readers, either on paper reading or hypertext reading. Using a variety of methods, researchers were able to enhance the reading comprehension and the users' satisfaction on hypertext reading, such as changing content presentation with visual-syntactic text formatting (VSTF) format or Jenga format. In terms of dynamically changing content presentation for reading, one less explored format is Portable Document Format (PDF), which was traditionally viewed within a modern Web browser or Adobe Acrobat reader on the desktop. PDF format was standardized as an open format in 2008 and has been widely used to keep a fixed-layout content. However, a fixed layout document presents a challenge to apply existing transformation methods, not mention on mobile devices. In this paper, we present a system that uses a novel algorithm to decode a PDF document and apply content transformation to enhance its readability. Although we used Jenga format as an example to enhance the readability of PDF documents, we envision the proposed framework can be used to adopt different transformation methods. The system was implemented in a mobile device and we are able to apply a basic transformation to a PDF document at both the sentence and paragraph levels. The main contribution of this research is we extend previous work of readability enhancement from paper document and hypertext content to PDF documents. Current result is promising, and we believe it is worth further investigation to make PDF documents readable and accessible on the Web for different populations, such as non-native English readers, people with dyslexia or special needs, etc.},
	urldate = {2020-04-23},
	booktitle = {Proceedings of the 17th {International} {Web} for {All} {Conference}},
	publisher = {Association for Computing Machinery},
	author = {Shelton, Zachary and Yu, Chen-Hsiang},
	month = apr,
	year = {2020},
	keywords = {mobile devices, non-native English readers, PDF readability, readability enhancement},
	pages = {1--4},
	file = {Shelton and Yu - 2020 - PDF readability enhancement on mobile devices.pdf:/Users/tullsen/Zotero/storage/W7PTZTZK/Shelton and Yu - 2020 - PDF readability enhancement on mobile devices.pdf:application/pdf},
}

@inproceedings{sorgeGeneratingWebaccessibleSTEM2020,
	address = {Taipei, Taiwan},
	series = {{W4A} '20},
	title = {Towards generating web-accessible {STEM} documents from {PDF}},
	isbn = {978-1-4503-7056-1},
	url = {https://doi.org/10.1145/3371300.3383351},
	doi = {10.1145/3371300.3383351},
	abstract = {PDF is still a very popular format that is widely used to exchange and archive electronic documents. And although considerable efforts have been made to ensure accessibility of PDF documents, they are still far from ideal when complex content like formulas, diagrams or tables is present. Unfortunately, many publications in scientific subjects are available in PDF format only and are therefore, if at all, only partially accessible. In this paper, we present a fully automated web-based technology to convert PDF documents into an accessible single file format. We concentrate on presenting working solutions for mathematical formulas and tables while also discussing some of the open problems in this context and how we aim to solve them in the future.},
	urldate = {2020-04-23},
	booktitle = {Proceedings of the 17th {International} {Web} for {All} {Conference}},
	publisher = {Association for Computing Machinery},
	author = {Sorge, Volker and Bansal, Akashdeep and Jadhav, Neha M and Garg, Himanshu and Verma, Ayushi and Balakrishnan, M},
	month = apr,
	year = {2020},
	keywords = {PDF, STEM accessibility, web},
	pages = {1--5},
	file = {Sorge et al. - 2020 - Towards generating web-accessible STEM documents f.pdf:/Users/tullsen/Zotero/storage/C98VNBEH/Sorge et al. - 2020 - Towards generating web-accessible STEM documents f.pdf:application/pdf},
}

@inproceedings{correaUnleashingTabularContent2017,
	address = {Staten Island, NY, USA},
	series = {dg.o '17},
	title = {Unleashing {Tabular} {Content} to {Open} {Data}: {A} {Survey} on {PDF} {Table} {Extraction} {Methods} and {Tools}},
	isbn = {978-1-4503-5317-5},
	shorttitle = {Unleashing {Tabular} {Content} to {Open} {Data}},
	url = {https://doi.org/10.1145/3085228.3085278},
	doi = {10.1145/3085228.3085278},
	abstract = {Portable Document Format (PDF) has been a popular way to exchange data in documents since Adobe introduced the format in 1993. Its report-like characteristic which preserves and prioritizes graphical visualization was part of the main publishing concerns among several segments including government agencies. In this way, tabular data started to be enclosed within PDF documents and disclosed in government portals. This situation, apart being surprisingly contradictory to data openness, is still found even in the major open data initiatives. It is estimated that roughly 13\% of published files in some main open data portals around the world have their data made available in PDF. Thus, there is a need for effective tools capable of extracting tabular content (a main placeholder for data) from PDF to allow its data to be published in more open formats such as the well-known CSV which complies with accessible and machine processable open data principles. This paper aims at providing a structured and comprehensive overview of the research in tabular content extraction specifically from PDF documents as well as to provide an overview of most recent practical results in the literature. The contribution of this work goes beyond theoretical discussions by helping data practitioners to understand to what extent methods and tools regarding tabular content extraction from PDF can benefit the open data initiatives in practical and effective ways.},
	urldate = {2020-05-09},
	booktitle = {Proceedings of the 18th {Annual} {International} {Conference} on {Digital} {Government} {Research}},
	publisher = {Association for Computing Machinery},
	author = {Corrêa, Andreiwid Sheffer and Zander, Pär-Ola},
	month = jun,
	year = {2017},
	keywords = {PDF, Data Extraction, Open Data, Tabular Content},
	pages = {54--63},
	file = {Corrêa and Zander - 2017 - Unleashing Tabular Content to Open Data A Survey .pdf:/Users/tullsen/Zotero/storage/2H9LMZRU/Corrêa and Zander - 2017 - Unleashing Tabular Content to Open Data A Survey .pdf:application/pdf},
}

@inproceedings{acosta-vargasAccessibilityPortableDocument2017,
	address = {Barcelona, Spain},
	series = {{ICETC} 2017},
	title = {Accessibility of {Portable} {Document} {Format} in {Education} {Repositories}},
	isbn = {978-1-4503-5435-6},
	url = {https://doi.org/10.1145/3175536.3175574},
	doi = {10.1145/3175536.3175574},
	abstract = {This article describes a study on accessibility in the Portable Document Format. This type of file is increasingly used on the Web. In order to make the format universally accessible, it is suggested to apply the standards defined by the World Wide Web Consortium in the Web Content Accessibility Guidelines 2.0. However, not all PDF documents are accessible. In this research, the accessibility of PDF files was evaluated when applying the PDF Techniques based on WCAG 2.0. As a case study, it was applied to the universities in Latin America with the highest academic prestige according to the classification of Webometrics. The evaluation of the documents found that, in general, universities have not been concerned with providing accessible documents. In this research, we study the problems encountered in documents and the solutions to generate more accessible and inclusive documents.},
	urldate = {2020-05-09},
	booktitle = {Proceedings of the 2017 9th {International} {Conference} on {Education} {Technology} and {Computers}},
	publisher = {Association for Computing Machinery},
	author = {Acosta-Vargas, Patricia and Luján-Mora, Sergio and Acosta, Tania},
	month = dec,
	year = {2017},
	keywords = {PDF, Accessibility, portable document format, accessible content, W3C, WCAG 2.0},
	pages = {239--242},
	file = {Acosta-Vargas et al. - 2017 - Accessibility of Portable Document Format in Educa.pdf:/Users/tullsen/Zotero/storage/LN37QALD/Acosta-Vargas et al. - 2017 - Accessibility of Portable Document Format in Educa.pdf:application/pdf},
}

@article{davilaChartMiningSurvey2020,
	title = {Chart {Mining}: {A} {Survey} of {Methods} for {Automated} {Chart} {Analysis}},
	issn = {1939-3539},
	shorttitle = {Chart {Mining}},
	url = {https://ieeexplore.ieee.org/document/9085944},
	doi = {10.1109/TPAMI.2020.2992028},
	abstract = {Charts are useful communication tools for the presentation of data in a visually appealing format that facilitates comprehension. There have been many studies dedicated to chart mining, which refers to the process of automatic detection, extraction and analysis of charts to reproduce the tabular data that was originally used to create them. By allowing access to data which might not be available in other formats, chart mining facilitates the creation of many downstream applications. This paper presents a comprehensive survey of approaches across all components of the automated chart mining pipeline such as (i) automated extraction of charts from documents; (ii) processing of multi-panel charts; (iii) automatic image classifiers to collect chart images at scale; (iv) automated extraction of data from each chart image, for popular chart types as well as selected specialized classes; (v) applications of chart mining; and (vi) datasets for training and evaluation, and the methods that were used to build them. Finally, we summarize the main trends found in the literature and provide pointers to areas for further research in chart mining.},
	journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
	author = {Davila, Kenny and Setlur, Srirangaraj and Doermann, David and Bhargava, Urala Kota and Govindaraju, Venu},
	year = {2020},
	note = {Conference Name: IEEE Transactions on Pattern Analysis and Machine Intelligence},
	keywords = {Chart Data Extraction, Chart Datasets, Chart Extraction, Chart Image Classification, Chart Survey, Chart Understanding, Multi-panel Chart Segmentation},
	pages = {1--1},
	file = {Davila et al. - 2020 - Chart Mining A Survey of Methods for Automated Ch.pdf:/Users/tullsen/Zotero/storage/AN94H7YY/Davila et al. - 2020 - Chart Mining A Survey of Methods for Automated Ch.pdf:application/pdf},
}

@inproceedings{jemburajkumarPDFAccessibilityResearch2020,
	title = {{PDF} {Accessibility} of {Research} {Papers}: {What} {Tools} are {Needed} for {Assessment} and {Remediation}?},
	copyright = {Attribution-NonCommercial-NoDerivatives 4.0 International},
	isbn = {978-0-9981331-3-3},
	shorttitle = {{PDF} {Accessibility} of {Research} {Papers}},
	url = {http://scholarspace.manoa.hawaii.edu/handle/10125/64254},
	doi = {10.24251/HICSS.2020.512},
	abstract = {Trillions of documents online are in PDF format, but only a small amount of these PDF documents include the necessary markup to make them accessible for people with disabilities. This paper presents the results of three related data collection efforts: a survey (with 61 participants), interviews (with 6 participants), and usability testing (with 6 participants), to learn more about what tools are needed for content contributors, to assist them in the assessment and remediation of accessibility in PDF documents. The paper provides suggested features and usability needed for software tools to support PDF document accessibility, as well as implications for content creators, scientific publishers, as well as the creator of the PDF format, Adobe.},
	language = {eng},
	urldate = {2020-04-28},
	author = {Jembu Rajkumar, Aravind and Lazar, Jonathan and Jordan, J. Bern and Darvishy, Alireza and Hutter, Hans-Peter},
	month = jan,
	year = {2020},
	note = {Accepted: 2020-01-04T08:02:01Z},
	file = {Jembu Rajkumar et al. - 2020 - PDF Accessibility of Research Papers What Tools a.pdf:/Users/tullsen/Zotero/storage/FP28U2UC/Jembu Rajkumar et al. - 2020 - PDF Accessibility of Research Papers What Tools a.pdf:application/pdf;Rajkumar et al. - 2020 - PDF Accessibility of Research Papers What Tools a.pdf:/Users/tullsen/Zotero/storage/EA8HFE89/Rajkumar et al. - 2020 - PDF Accessibility of Research Papers What Tools a.pdf:application/pdf},
}

@inproceedings{jemburajkumarImprovingPDFAccessibility2020,
	address = {Cham},
	title = {Improving {PDF} {Accessibility} {Tools} for {Content} {Developers}: {Looking} {Towards} the {Future}},
	isbn = {978-3-030-43865-4},
	shorttitle = {Improving {PDF} {Accessibility} {Tools} for {Content} {Developers}},
	url = {https://link.springer.com/chapter/10.1007/978-3-030-43865-4_18},
	doi = {10.1007/978-3-030-43865-4_18},
	abstract = {While much of the discussion on digital accessibility focuses on web accessibility, accessibility of PDF files is an ongoing but frequently overlooked challenge. The guidelines for PDF accessibility, known as PDF Universal Accessibility (PDF U/A or the Matterhorn Protocol), are nowhere near as well known as the Web Content Accessibility Guidelines (WCAG). One of the greatest challenges is the insufficient level of tools to assist content creators. In a previous research paper was published in January 2020 (Jembu Rajkumar et al. 2020), our team used surveys, usability testing and interviews, to understand the challenges that content creators in STEM fields face in making PDF files accessible. In this paper, we propose next steps in tool development for improving PDF accessibility in the future.},
	language = {en},
	booktitle = {Designing for {Inclusion}},
	publisher = {Springer International Publishing},
	author = {Jembu Rajkumar, A. and Jordan, J. B. and Lazar, J.},
	editor = {Langdon, Patrick and Lazar, Jonathan and Heylighen, Ann and Dong, Hua},
	month = apr,
	year = {2020},
	pages = {173--181},
}

@inproceedings{masci3DCARAREProject2012,
	title = {{3D} in the {CARARE} project: {Providing} {Europeana} with {3D} content for the archaeological and architectural heritage: {The} {Pompeii} case study},
	shorttitle = {{3D} in the {CARARE} project},
	url = {https://ieeexplore.ieee.org/document/6365929},
	doi = {10.1109/VSMM.2012.6365929},
	abstract = {CARARE is a best practice network funded by the EU ICT Policy Support Programme. It is aimed at establishing a best practice network of heritage agencies, archaeological organisations and research institutions to establish an aggregation service for Europeana for archaeological and architectural digital content, including images, texts and 3D objects. CARARE is particularly focused on recommendations for metadata schemas dedicated to archaeological and architectural heritage and on solutions for the publication of 3D and Virtual Reality content. This paper describes the recommendations which have been made by the project for publishing 3D objects online in easily accessible formats such as 3D PDF with the metadata necessary for discovery in Europeana. It is illustrated by the contribution of the Scuola Normale Superiore and its work in preparing 3D representations of Pompeii monuments for publication through Europeana.},
	booktitle = {2012 18th {International} {Conference} on {Virtual} {Systems} and {Multimedia}},
	author = {Masci, Maria Emilia and De Santis, Annamaria and Fernie, Kate and Pletinckx, Daniel},
	month = sep,
	year = {2012},
	keywords = {Portable document format, metadata, Solid modeling, Organizations, Buildings, Virtual reality, 3D publishing formats, Archaeology, Architecture, Best practices, cultural heritage, Europe, Europeana},
	pages = {227--234},
	file = {Masci et al. - 2012 - 3D in the CARARE project Providing Europeana with.pdf:/Users/tullsen/Zotero/storage/RMK3HTGY/Masci et al. - 2012 - 3D in the CARARE project Providing Europeana with.pdf:application/pdf},
}

@misc{isotc130wg2ISO21812120192019,
	title = {{ISO} 21812-1:2019 {Graphic} technology - {Print} product metadata for {PDF} files - {Part} 1: {Architecture} and core requirements for metadata},
	copyright = {Copyright ISO},
	shorttitle = {Print product metadata},
	url = {https://www.iso.org/standard/74407.html},
	abstract = {The document part metadata in a PDF file that conforms to this document can be used to communicate the intended appearance of print products and their components. Examples of intended use are: direct interpretation within a production process, creation of job tickets such as XJDF, or populating records in an MIS. This document builds on the DPart syntax as specified in ISO 16612‑2 (PDF/VT) and ISO 32000‑2 (PDF 2.0) which is designed for encoding metadata related to pages or groups of pages in PDF files.
NOTE The document part metadata provided in this document applies to individual document parts, whereas XMP metadata typically applies to the scope of the entire document. XMP can apply to the scope of an individual page or part of a page but this usage is very uncommon. Thus, XMP is not applicable for the case where metadata is required for sets of pages such as multiple recipients or binding information. For example, XMP is used within PDF/X for file conformance identification and is also used for additional file level information such as author.
This document defines standardized metadata to:
— provide product intent specifications such as paper media selection and binding information;
— identify the type of product that the content pages are intended to represent (e.g. a brochure, letter or postcard);
— identify the intended recipient of each of the content pages for variable document printing applications.
This document defines a base conformance level that includes the syntax of the metadata framework and the semantics of a core set of metadata.},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 130 WG 2}},
	month = jun,
	year = {2019},
	note = {PDF cannot be attached due to copyright restrictions. This standard must be purchased.},
	keywords = {ISO, PDF/X},
}

@misc{isotc171sc2wg9ISO14289120142014,
	title = {{ISO} 14289-1:2014 {Document} management applications - {Electronic} document file format enhancement for accessibility - {Part} 1: {Use} of {ISO} 32000-1 ({PDF}/{UA}-1)},
	copyright = {Copyright ISO},
	shorttitle = {{ISO} 14289-1 ({PDF}/{UA}-1)},
	url = {http://www.iso.org/cms/render/live/en/sites/isoorg/contents/data/standard/06/45/64599.html},
	abstract = {Document management applications -- Electronic document file format enhancement for accessibility -- Part 1: Use of ISO 32000-1 (PDF/UA-1)

REVISED},
	language = {English},
	publisher = {ISO},
	editor = {ISO TC 171 SC 2 WG 9},
	month = jul,
	year = {2014},
	note = {PDF cannot be attached due to copyright restrictions. This standard must be purchased.},
	keywords = {ISO, ISO 14289, PDF/UA},
}

@book{isotc171sc2wg8ISODIS3200022020,
	series = {{ISO} 32000},
	title = {{ISO}/{DIS} 32000-2:202x {Document} management - {Portable} {Document} {Format} - {Part} 2: {PDF} 2.0},
	volume = {2},
	copyright = {Copyright ISO},
	shorttitle = {{PDF} 2.0 dated revision {DIS}},
	url = {https://www.iso.org/standard/75839.html},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 171 SC 2 WG 8}},
	month = mar,
	year = {2020},
	keywords = {PDF 2.0, ISO, ISO 32000},
}

@book{isotc171sc2wg8ISO32000220202020,
	series = {{ISO} 32000},
	title = {{ISO} 32000-2:2020 {Document} management - {Portable} {Document} {Format} - {Part} 2: {PDF} 2.0},
	volume = {2},
	copyright = {Copyright ISO},
	shorttitle = {{PDF} 2.0 dated revision},
	url = {https://www.iso.org/standard/75839.html},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 171 SC 2 WG 8}},
	month = dec,
	year = {2020},
	keywords = {PDF 2.0, ISO, ISO 32000},
}

@misc{isotc171sc2wg8ISOFDIS2350412020,
	title = {{ISO}/{FDIS} 23504-1:2020 {Document} management applications - {Raster} image transport and storage - {Part} 1: {Use} of {ISO} 32000 ({PDF}/{R}-1)},
	copyright = {Copyright ISO},
	shorttitle = {{PDF}/{R}},
	url = {https://www.iso.org/standard/75804.html},
	abstract = {This document defines a subset of ISO 32000 suitable for storage, transport and exchange of multi-page raster-image documents, including but not limited to scanned documents. Bitonal, grayscale and RGB images are supported. Compression options for image data streams include JPEG, CCITT Group 4 Fax and uncompressed.},
	language = {English},
	publisher = {ISO},
	author = {ISO TC 171 SC 2 WG 8},
	month = mar,
	year = {2020},
	keywords = {ISO, PDF/raster},
}

@misc{isotc171sc2wg8ISO21757120202020,
	title = {{ISO} 21757-1:2020 {Document} management - {ECMAScript} for {PDF} - {Part} 1: {Use} of {ISO} 32000-2 ({PDF} 2.0)},
	copyright = {Copyright ISO},
	shorttitle = {{ECMAscript} for {PDF} 2.0},
	url = {https://www.iso.org/standard/71559.html},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 171 SC 2 WG 8}},
	month = dec,
	year = {2020},
	keywords = {PDF 2.0, ISO},
}

@misc{isotc171sc2wg5ISO19005420202020,
	title = {{ISO} 19005-4:2020 {Document} management - {Electronic} document file format for long-term preservation - {Part} 4: {Use} of {ISO} 32000-2 ({PDF}/{A}-4)},
	copyright = {Copyright ISO},
	shorttitle = {{PDF}/{A}-4},
	url = {https://www.iso.org/standard/71832.html},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 171 SC 2 WG 5}},
	month = nov,
	year = {2020},
	keywords = {PDF/A, PDF 2.0, ISO, ISO 19005},
}

@article{cuiMMPDNovelMalicious2020,
	title = {{MMPD}: {A} {Novel} {Malicious} {PDF} {File} {Detector} for {Mobile} {Robots}},
	issn = {1558-1748},
	shorttitle = {{MMPD}},
	url = {https://ieeexplore.ieee.org/abstract/document/9214490},
	doi = {10.1109/JSEN.2020.3029083},
	abstract = {PDF is one of the most commonly used office tools, and can be used by robots to share documents with people. However, it has become the target of hackers to attack a variety of devices since it is cross-platform and can be embed with JavaScript code or URIs. Therefore, how to protect the platform security while robots parsing PDF files becomes a serious problem. In this paper, we design and realize a novel malicious PDF file detector, MMPD, for mobile robots based on deep learning. Experiments show that the F1-Score of MMPD can achieve up to 99.478\%. In the meantime, the hardware resource usage is low and no significant performance reduction of the whole system is observed.},
	language = {English},
	journal = {IEEE Sensors Journal},
	author = {Cui, Y. and Sun, Y. and Luo, J. and Huang, Y. and Zhou, Y. and Li, X.},
	month = oct,
	year = {2020},
	note = {Conference Name: IEEE Sensors Journal},
	keywords = {Machine learning, PDF, Portable document format, Feature extraction, Malware, Security, machine learning, malware, malicious file detection, Mobile robots, robot security},
	pages = {1--1},
	file = {Cui et al. - 2020 - MMPD A Novel Malicious PDF File Detector for Mobi.pdf:/Users/tullsen/Zotero/storage/8WYQC8YU/Cui et al. - 2020 - MMPD A Novel Malicious PDF File Detector for Mobi.pdf:application/pdf},
}

@misc{isotc130wg2ISO15930920202020,
	title = {{ISO} 15930-9:2020 {Graphic} technology - {Prepress} digital data exchange using {PDF} - {Part} 9: {Complete} exchange of printing data ({PDF}/{X}-6) and partial exchange of printing data with external profile reference ({PDF}/{X}-6p and {PDF}/{X}-6n) using {PDF} 2.0},
	copyright = {Copyright ISO},
	shorttitle = {{PDF}/{X}-6},
	url = {https://www.iso.org/standard/77103.html},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 130 WG 2}},
	month = nov,
	year = {2020},
	keywords = {PDF 2.0, ISO, PDF/X},
}

@article{kangDetectionMaliciousPDF2019,
	title = {Detection of {Malicious} {PDF} {Files} {Using} a {Two}-{Stage} {Machine} {Learning} {Algorithm}},
	abstract = {Portable document format (PDF) ﬁles are increasingly used to launch cyberattacks due to their popularity and increasing number of vulnerabilities. Many solutions have been developed to detect malicious ﬁles, but their accuracy decreases rapidly in face of new evasion techniques. We explore how to improve the robustness of classiﬁers for detecting adversarial attacks in PDF ﬁles. Content replacement and the n-gram are implemented to extract robust features using proposed guiding principles. In the two-stage machine learning model, the objects are divided based on their types, and the anomaly detection model is ﬁrst trained for each type individually. The former detection results are organized into tree-like information structure and treated as inputs to convolutional neural network. Experimental results show that the accuracy of our classiﬁer is nearly 100\% and the robustness against evasive samples is excellent. The object features also enable the identiﬁcation of diﬀerent vulnerabilities exploited in malicious PDF ﬁles.},
	language = {en},
	journal = {Chinese Journal of Electronics},
	author = {Kang, HE and Yuefei, ZHU and Yubo, HE and Long, LIU and Bin, LU and Wei, LIN},
	month = oct,
	year = {2019},
	pages = {13},
	file = {Kang et al. - 2020 - Detection of Malicious PDF Files Using a Two-Stage.pdf:/Users/tullsen/Zotero/storage/UFJVLM2K/Kang et al. - 2020 - Detection of Malicious PDF Files Using a Two-Stage.pdf:application/pdf},
}

@inproceedings{krishnaswamiTypedAlgebraicApproach2019,
	address = {Phoenix, AZ, USA},
	title = {A typed, algebraic approach to parsing},
	isbn = {978-1-4503-6712-7},
	url = {http://dl.acm.org/citation.cfm?doid=3314221.3314625},
	doi = {10.1145/3314221.3314625},
	language = {English},
	urldate = {2020-11-10},
	booktitle = {Proceedings of the 40th {ACM} {SIGPLAN} {Conference} on {Programming} {Language} {Design} and {Implementation}  - {PLDI} 2019},
	publisher = {ACM Press},
	author = {Krishnaswami, Neelakantan R. and Yallop, Jeremy},
	year = {2019},
	pages = {379--393},
	file = {Krishnaswami and Yallop - 2019 - A typed, algebraic approach to parsing.pdf:/Users/tullsen/Zotero/storage/IQZQLEEL/Krishnaswami and Yallop - 2019 - A typed, algebraic approach to parsing.pdf:application/pdf},
}

@article{bangertNailPracticalTool2018,
	title = {Nail: {A} practical tool for parsing and generating data formats},
	issn = {978-1-931971-16-4},
	abstract = {Nail is a tool that greatly reduces the programmer effort for safely parsing and generating data formats defined by a grammar. Nail introduces several key ideas to achieve its goal. First, Nail uses a protocol grammar to define not just the data format, but also the internal object model of the data. Second, Nail eliminates the notion of semantic actions, used by existing parser generators, which reduces the expressive power but allows Nail to both parse data formats and generate them from the internal object model, by establishing a semantic bijection between the data format and the object model. Third, Nail introduces dependent fields and stream transforms to capture protocol features such as size and offset fields, checksums, and compressed data, which are impractical to express in existing protocol languages. Using Nail, we implement an authoritative DNS server in C in under 300 lines of code and grammar, and an unzip program in C in 220 lines of code and grammar, demonstrating that Nail makes it easy to parse complex real-world data formats. Performance experiments show that a Nail-based DNS server can outperform the widely used BIND DNS server on an authoritative workload, demonstrating that systems built with Nail can achieve good performance.},
	language = {English},
	journal = {Proceedings of the 11th USENIX Symposium on  Operating Systems Design and Implementation},
	author = {Bangert, Julian and Zeldovich, Nickolai},
	month = oct,
	year = {2018},
	pages = {15},
	file = {Bangert and Zeldovich - Nail A practical tool for parsing and generating .pdf:/Users/tullsen/Zotero/storage/S8ABX8A8/Bangert and Zeldovich - Nail A practical tool for parsing and generating .pdf:application/pdf},
}

@inproceedings{lucksTamingLengthField2017,
	address = {San Jose, CA},
	title = {Taming the {Length} {Field} in {Binary} {Data}: {Calc}-{Regular} {Languages}},
	isbn = {978-1-5386-1968-1},
	shorttitle = {Taming the {Length} {Field} in {Binary} {Data}},
	url = {http://ieeexplore.ieee.org/document/8227292/},
	doi = {10.1109/SPW.2017.33},
	abstract = {When binary data are sent over a byte stream, the binary format sender and receiver are using a “data serialization language”, either explicitly speciﬁed, or implied by the implementations. Security is at risk when sender and receiver disagree on details of this language. If, e.g., the receiver fails to reject invalid messages, an adversary may assemble such invalid messages to compromise the receiver’s security.},
	language = {English},
	urldate = {2020-11-10},
	booktitle = {2017 {IEEE} {Security} and {Privacy} {Workshops} ({SPW})},
	publisher = {IEEE},
	author = {Lucks, Stefan and Grosch, Norina Marie and Konig, Joshua},
	month = may,
	year = {2017},
	pages = {66--79},
	file = {Lucks et al. - 2017 - Taming the Length Field in Binary Data Calc-Regul.pdf:/Users/tullsen/Zotero/storage/SW77468H/Lucks et al. - 2017 - Taming the Length Field in Binary Data Calc-Regul.pdf:application/pdf},
}

@misc{laurietrattWhichParsingApproach2020,
	title = {Which {Parsing} {Approach}?},
	url = {https://tratt.net/laurie/blog/entries/which_parsing_approach.html},
	abstract = {We all know that parsing is an important part of designing and implementing programming languages, but it’s the equivalent of Brussels sprouts: good for the diet, but a taste that only a select few enjoy. Unfortunately, I’ve come to realise that our general distaste for parsing is problematic. While many of us think that we’ve absorbed the advances of the 1960s into our collective understanding, I fear that we have regressed, and that we are often making inappropriate decisions about parsing. If that sounds accusatory, I don’t mean it to be: I spent over 20 years assuming that parsing is easy and that I didn’t need to understand it properly in order to use it well. Alas, reality has been a cruel teacher, and in this post I want to share some of the lessons I’ve been forced to slowly learn and acknowledge.},
	author = {{Laurie Tratt}},
	month = sep,
	year = {2020},
}

@misc{ChallengingLRParsing,
	title = {Challenging {LR} {Parsing}},
	url = {https://rust-analyzer.github.io/blog/2020/09/16/challeging-LR-parsing.html},
	urldate = {2020-11-09},
}

@misc{VerifiedEfficientParsing,
	title = {Verified efficient parsing for binary data formats — {EverParse} {Manual} documentation},
	url = {https://project-everest.github.io/everparse/},
	urldate = {2020-11-09},
}

@article{protzenkoVerifiedLowLevelProgramming2018,
	title = {Verified {Low}-{Level} {Programming} {Embedded} in {F}*},
	url = {https://www.microsoft.com/en-us/research/publication/verified-low-level-programming-embedded-f/},
	abstract = {We present Low*, a language for low-level programming and verification, and its application to high-assurance optimized cryptographic libraries. Low* is a shallow embedding of a small, sequential, well-behaved subset of C in F*, a dependently-typed variant of ML aimed at program verification. Departing from ML, Low* does not involve any garbage collection or implicit heap allocation; instead, it has a structured memory model {\textbackslash}`a la CompCert, and it provides the control required for writing efficient low-level security-critical code. By virtue of typing, any Low* program is memory safe. In addition, the programmer can make full use of the verification power of F* to write high-level specifications and verify the functional correctness of Low* code using a combination of SMT automation and sophisticated manual proofs. At extraction time, specifications and proofs are erased, and the remaining code enjoys a predictable translation to C. We prove that this translation preserves semantics and side-channel resistance. We provide a new compiler back-end from Low* to C and, to evaluate our approach, we implement and verify various cryptographic algorithms, constructions, and tools for a total of about 28,000 lines of code, specification and proof. We show that our Low* code delivers performance competitive with existing (unverified) C cryptographic libraries, suggesting our approach may be applicable to larger-scale low-level software.},
	language = {English},
	urldate = {2020-11-09},
	journal = {https://arxiv.org/abs/1703.00053},
	author = {Protzenko, Jonathan and Zinzindohoué, Jean-Karim and Rastogi, Aseem and Ramananandro, Tahina and Wang, Peng and Zanella-Béguelin, Santiago and Delignat-Lavaud, Antoine and Hritcu, Catalin and Bhargavan, Karthikeyan and Fournet, Cédric and Swamy, Nikhil},
	month = dec,
	year = {2018},
	note = {arXiv: 1703.00053
Comment: extended version of ICFP final camera ready version; only Acknowledgements differ from 30 Aug 2017 version},
	keywords = {Computer Science - Cryptography and Security, Computer Science - Programming Languages},
	file = {Protzenko et al. - 2018 - Verified Low-Level Programming Embedded in F.pdf:/Users/tullsen/Zotero/storage/TLLA4NYY/Protzenko et al. - 2018 - Verified Low-Level Programming Embedded in F.pdf:application/pdf},
}

@book{isotc171sc2wg8ISOFDIS3200022020,
	series = {{ISO} 32000},
	title = {{ISO}/{FDIS} 32000-2:2020 {Document} management - {Portable} document format - {Part} 2: {PDF} 2.0},
	volume = {2},
	copyright = {Copyright ISO},
	shorttitle = {{PDF} 2.0 dated revision {FDIS}},
	url = {https://www.iso.org/standard/75839.html},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 171 SC 2 WG 8}},
	month = sep,
	year = {2020},
	keywords = {PDF 2.0, ISO, ISO 32000},
}

@misc{ChinaAcademicJournals,
	title = {China {Academic} {Journals} ({CAJ})},
	abstract = {“China Academic Journals” is a publication that is available for educational and scholarly purposes which contains topics related to China, Hong Kong and Taiwan collected since 1949. Publications from the full text databases used by the journals have a “.caj” file extension. Articles can be provided in both “.caj” and PDF format, but thesis’ can only be provided in “.caj” format.
This is a standard PDF file format with two additional lines after \%\%EOF:
WebFastLoad
{\textless}?xml version="1.0" encoding="gb2312"?{\textgreater}{\textless}FileProperty-Package{\textgreater}{\textless}DOI{\textgreater}CNKI:SUN:ZJSH.0.2012-05-021{\textless}/DOI{\textgreater}{\textless}SCODE{\textgreater}H122{\textless}/SCODE{\textgreater}{\textless}PCODE{\textgreater}CRFD{\textless}/PCODE{\textgreater}{\textless}DURL{\textgreater}http://gongjushu.cnki.net/dict/{\textless}/DURL{\textgreater}{\textless}/FileProperty-Package{\textgreater}

There is a CAJ-CD B/T 1-2006 Chinese standard, but it stands for “Data Norm for Retrieval and Evaluation of Chinese Academic Journal (CD)”. This is different from file format with extension “.caj”.

On the other hand, there is a “real” .caj format developed by Tsinghua Tongfang Optical Disc Co., Ltd. This file format has to be opened by a special .caj viewer, e.g. CAJViewer, application. This was developed by Tsinghua Tongfang Knowledge Network Corp. This software uses some well-known third party technologies, such as FreeType2, libjpeg, Kakadu and Little CMS. This software has marking, highlighting and commenting, etc. tools but stores this data in a separate XML file (does not modifying the PDF file (.caj) directly).},
	language = {Chinese},
}

@misc{isotc130wg2ISODIS1593092020,
	title = {{ISO}/{DIS} 15930-9:2020 {Graphic} technology - {Prepress} digital data exchange using {PDF} - {Part} 9: {Complete} exchange of printing data ({PDF}/{X}-6) and partial exchange of printing data with external profile reference ({PDF}/{X}-6p and {PDF}/{X}-6n) using {PDF} 2.0},
	copyright = {Copyright ISO},
	shorttitle = {{PDF}/{X}-6},
	url = {https://www.iso.org/standard/77103.html},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 130 WG 2}},
	month = mar,
	year = {2020},
	keywords = {PDF 2.0, ISO, PDF/X},
}

@article{scofieldAutomatedModelLearning2020,
	title = {Automated {Model} {Learning} for {Accurate} {Detection} of {Malicious} {Digital} {Documents}},
	volume = {1},
	url = {https://doi.org/10.1145/3379505},
	doi = {10.1145/3379505},
	abstract = {Modern cyber attacks are often conducted by distributing digital documents that contain malware. The approach detailed herein, which consists of a classifier that uses features derived from dynamic analysis of a document viewer as it renders the document in question, is capable of classifying the disposition of digital documents with greater than 98\% accuracy even when its model is trained on just small amounts of data. To keep the classification model itself small and thereby to provide scalability, we employ an entity resolution strategy that merges syntactically disparate features that are thought to be semantically equivalent but vary due to programmatic randomness. Entity resolution enables construction of a comprehensive model of benign functionality using relatively few training documents, and the model does not improve significantly with additional training data. In particular, we describe and quantitatively evaluate a fully automated, document format agnostic approach for quickly learning a classification model that provides efficacious malicious document detection.},
	language = {English},
	number = {3},
	urldate = {2020-05-09},
	journal = {Digital Threats: Research and Practice},
	author = {Scofield, Daniel and Miles, Craig and Kuhn, Stephen},
	month = aug,
	year = {2020},
	file = {Scofield et al. - 2020 - Automated Model Learning for Accurate Detection of.pdf:/Users/tullsen/Zotero/storage/U6G7YPAX/Scofield et al. - 2020 - Automated Model Learning for Accurate Detection of.pdf:application/pdf},
}

@misc{michaelklinkInvestigatingPDFShadow2020,
	title = {Investigating {PDF} {Shadow} {Attacks}: {In}-{Depth} {PDF} {Security} using {iText} ({Part} 3)},
	shorttitle = {Investigating {PDF} {Shadow} {Attacks}},
	url = {https://itextpdf.com/en/blog/technical-notes/investigating-pdf-shadow-attacks-depth-pdf-security-using-itext-part-3},
	abstract = {In the third and final part in our series of articles looking at the PDF Shadow Attacks, we explore using iText 7 to inspect documents which may be used to perform attacks using the "Replace" and "Hide and Replace" methods.},
	language = {English},
	urldate = {2020-11-04},
	journal = {iText PDF},
	author = {{Michael Klink}},
	month = nov,
	year = {2020},
	note = {Publisher: iTextpdf Software},
}

@article{falahImprovingMaliciousPDF2021,
	title = {Improving malicious {PDF} classifier with feature engineering: {A} data-driven approach},
	volume = {115},
	issn = {0167-739X},
	shorttitle = {Improving malicious {PDF} classifier with feature engineering},
	url = {http://www.sciencedirect.com/science/article/pii/S0167739X20305082},
	doi = {10.1016/j.future.2020.09.015},
	abstract = {Several approaches and tools have been developed to analyse and detect the presence of malicious content within the PDF; however, the fundamental approach in designing the existing tools and techniques has not been entirely considerate. Existing tools are based on the available datasets and the observation made during the maldoc manual analysis, making them susceptible to various types of attacks such as Mimicry and Parser confusion. We aim to enhance PDF maldoc classification by identifying the most conclusive feature-set required for accurately classifying PDF maldocs. We extract features using two popular PDF analysis tools and derive a set of features backed by data that further complements classification. We subsequently evaluate all features through a wrapper function. The features with the highest importance values are used to construct a classifier that outperforms the baseline models in terms of classification accuracy and efficiency. Our proposed method helps us identify a useful set of tool-independent features that prolong the current tools’ lifespan and usability. It provides us with an in-depth understanding of how these chosen features cumulatively impact the classification. In addition, we evaluate our findings using real-world samples from VirusTotal. Using our proposed technique, we managed to decrease the size of the feature-set by more than 60\% while increasing the classification accuracy by around 2\%.},
	language = {en},
	urldate = {2020-09-26},
	journal = {Future Generation Computer Systems},
	author = {Falah, Ahmed and Pan, Lei and Huda, Shamsul and Pokhrel, Shiva Raj and Anwar, Adnan},
	month = feb,
	year = {2021},
	keywords = {Machine learning, Malicious PDF, Feature aggregation, Feature engineering, Malware analysis},
	pages = {314--326},
}

@article{jungImageDetoxMethodNeutralization2020,
	title = {{ImageDetox}: {Method} for the {Neutralization} of {Malicious} {Code} {Hidden} in {Image} {Files}},
	volume = {12},
	issn = {2073-8994},
	shorttitle = {{ImageDetox}},
	url = {https://www.mdpi.com/2073-8994/12/10/1621},
	doi = {10.3390/sym12101621},
	abstract = {Malicious codes may cause virus infections or threats of ransomware through symmetric encryption. Moreover, various bypassing techniques such as steganography, which refers to the hiding of malicious code in image ﬁles, have been devised. Unknown or new malware hidden in an image ﬁle in the form of malicious code is diﬃcult to detect using most representative reputationor signature-based antivirus methods. In this paper, we propose the use of ImageDetox method to neutralize malicious code hidden in an image ﬁle even in the absence of any prior information regarding the signatures or characteristics of the code. This method is composed of four modules: image ﬁle extraction, image ﬁle format analysis, image ﬁle conversion, and the convergence of image ﬁle management modules. To demonstrate the eﬀectiveness of the proposed method, 30 image ﬁles with hidden malicious codes were used in an experiment. The malicious codes were selected from 48,220 recent malicious codes purchased from VirusTotal (a commercial application programming interface (API)). The experimental results showed that the detection rate of viruses was remarkably reduced. In addition, image ﬁles from which the hidden malicious code had previously been removed using a nonlinear transfer function maintained nearly the same quality as that of the original image; in particular, the diﬀerence could not be distinguished by the naked eye. The proposed method can also be utilized to prevent security threats resulting from the concealment of conﬁdential information in image ﬁles with the aim of leaking such threats.},
	language = {en},
	number = {10},
	urldate = {2020-10-05},
	journal = {Symmetry},
	author = {Jung, Dong-Seob and Lee, Sang-Joon and Euom, Ieck-Chae},
	month = sep,
	year = {2020},
	pages = {1621},
	file = {Jung et al. - 2020 - ImageDetox Method for the Neutralization of Malic.pdf:/Users/tullsen/Zotero/storage/JV4UBYIU/Jung et al. - 2020 - ImageDetox Method for the Neutralization of Malic.pdf:application/pdf},
}

@inproceedings{johnsonAchievingCanonicalPDF2014,
	address = {Melbourne, Australia},
	title = {Achieving {Canonical} {PDF} {Validation}},
	copyright = {Creative Commons Attribution 3.0 unported license},
	isbn = {978-0-642-27881-4},
	shorttitle = {{iPRES} 2014},
	url = {https://www.nla.gov.au/sites/default/files/ipres2014-proceedings-version_1.pdf},
	abstract = {While PDF is the best currently available option for archiving fixed-form electronic documents, low quality PDF files remain problematic throughout the document lifecycle, and can pose substantial challenges for memory institutions.},
	language = {English},
	booktitle = {Proceedings of the 11th {International} {Conference} on {Digital} {Preservation}},
	author = {Johnson, Duff},
	month = oct,
	year = {2014},
	pages = {5},
	file = {ipres2014-proceedings-version_1.pdf:/Users/tullsen/Zotero/storage/TD6H2X3W/ipres2014-proceedings-version_1.pdf:application/pdf;Johnson - Achieving Canonical PDF Validation.pdf:/Users/tullsen/Zotero/storage/PUT3ED3Y/Johnson - Achieving Canonical PDF Validation.pdf:application/pdf},
}

@article{garfinkelBringingScienceDigital2009,
	series = {The {Proceedings} of the {Ninth} {Annual} {DFRWS} {Conference}},
	title = {Bringing science to digital forensics with standardized forensic corpora},
	volume = {6},
	issn = {1742-2876},
	url = {http://www.sciencedirect.com/science/article/pii/S1742287609000346},
	doi = {10.1016/j.diin.2009.06.016},
	abstract = {Progress in computer forensics research has been limited by the lack of a standardized data sets—corpora—that are available for research purposes. We explain why corpora are needed to further forensic research, present a taxonomy for describing corpora, and announce the availability of several forensic data sets.},
	language = {en},
	urldate = {2020-08-31},
	journal = {Digital Investigation},
	author = {Garfinkel, Simson and Farrell, Paul and Roussev, Vassil and Dinolt, George},
	month = sep,
	year = {2009},
	keywords = {Forensics, Corpora, Human subjects research, Real data corpus, Realistic data},
	pages = {S2--S11},
	file = {Garfinkel et al. - 2009 - Bringing science to digital forensics with standar.pdf:/Users/tullsen/Zotero/storage/L4B8UH8P/Garfinkel et al. - 2009 - Bringing science to digital forensics with standar.pdf:application/pdf},
}

@article{garfinkelLeakingSensitiveInformation2014,
	title = {Leaking {Sensitive} {Information} in {Complex} {Document} {Files}–and {How} to {Prevent} {It}},
	volume = {12},
	issn = {1558-4046},
	url = {https://ieeexplore.ieee.org/document/6654123},
	doi = {10.1109/MSP.2013.131},
	abstract = {Complex document formats such as PDF and Microsoft's Compound File Binary Format can contain information that is hidden but recoverable, as a result of text highlighting, cropping, or the embedding of high-resolution JPEG images. Private information can be released inadvertently if these files are distributed in electronic form. Simple experiments involving the creation of test documents can determine whether a particular program embeds hidden information.},
	language = {English},
	number = {1},
	journal = {IEEE Security Privacy},
	author = {Garfinkel, Simson L.},
	month = jan,
	year = {2014},
	note = {Conference Name: IEEE Security Privacy},
	keywords = {PDF, Portable document format, Cryptography, metadata, Metadata, Transform coding, PDF format, document handling, privacy, Encryption, JPEG, Computer security, Government, compound file binary format, data privacy, document files, hidden information, high-resolution JPEG image embedding, Microsoft, Microsoft Office, Photoshop, private information, redaction, sensitive information leak prevention, text cropping, text highlighting, User centered design},
	pages = {20--27},
	file = {Garfinkel - 2014 - Leaking Sensitive Information in Complex Document .pdf:/Users/tullsen/Zotero/storage/37A885XD/Garfinkel - 2014 - Leaking Sensitive Information in Complex Document .pdf:application/pdf},
}

@inproceedings{kamwoodsRedactingPrivateSensitive2015,
	title = {Redacting {Private} and {Sensitive} {Information} in {Born}-{Digital} {Collections}},
	url = {https://www.researchgate.net/publication/279736261_Redacting_Private_and_Sensitive_Information_in_Born-Digital_Collections},
	abstract = {Redaction of personal, private, and sensitive information from born-digital materials is increasingly important for repositories. Collection sizes are often too large to process without automation – the assistance of software designed specifically to identify and classify such information and present it in a format that facilitates redaction decisions. Distinguishing between items that may be redacted automatically and those that require manual intervention is similarly important.
This paper examines the identification, organization, and redaction of private and sensitive information identified within born-digital materials, particularly those contained on disk images extracted from fixed and removable media carriers. We identify specific items of interest in file systems and individual file formats that may be targets for redaction, and present two approaches to managing and providing access to redacted materials using open source tools developed for the BitCurator Access project (bcawebtools) along with supporting digital forensics software.},
	language = {English},
	author = {{Kam Woods} and {Christopher A. Lee}},
	month = may,
	year = {2015},
	pages = {6},
	file = {Kam Woods and Christopher A. Lee - 2015 - Redacting Private and Sensitive Information in Bor.pdf:/Users/tullsen/Zotero/storage/FVJI2FJQ/Kam Woods and Christopher A. Lee - 2015 - Redacting Private and Sensitive Information in Bor.pdf:application/pdf},
}

@misc{jensmullerPortableDocumentFlaws2020,
	address = {USA},
	title = {Portable {Document} {Flaws} 101},
	shorttitle = {Portable {Document} {Flaws} 101},
	url = {https://www.blackhat.com/us-20/briefings/schedule/#portable-document-flaws--20387},
	abstract = {PDF is the de facto standard for document exchange. It is common to open PDF files from potentially untrusted sources such as email attachments or downloaded from the Internet. In this report, analyze the capabilities of malicious PDF documents. We abuse legitimate features of the PDF standard itself instead of focusing on implementation bugs. Our attacks are categorized into five classes: 
(1) Denial-of-Service attacks affecting the host on which the document is processed. 
(2) Invasion of privacy attacks which track the document usage.
(3) Information disclosure attacks leaking personal data out of the victim’s computer.
(4) Data manipulating on the victim’s system. 
(5) Code execution on the victim’s machine. 
An evaluation of 28 popular PDF processing applications shows that 26 of them are vulnerable at least one attack. Finally, we propose a methodology to systematically protect against attacks based on PDF features.},
	language = {English},
	author = {{Jens Müller} and {Dominik Noss} and {Christian Mainka} and {Vladislav Mladenov} and {Jörg Schwenk}},
	month = aug,
	year = {2020},
	note = {https://i.blackhat.com/USA-20/Thursday/us-20-Mueller-Portable-Document-Flaws-101.pdf},
	annote = {Summary
A better peer review could have helped a lot here.
I easily noted that some of their attacks are explicitly invalid from a file format perspective even if the researchers don't correctly acknowledge this: "We abuse legitimate features of the PDF standard itself instead of focusing on implementation bugs" - these are "implementation bugs" if the spec clearly says the opposite but implementers do otherwise. They also incorrectly state in their whitepaper that they "found the following novel variants".
e.g. slide 17 and self-referential Actions. Even the Adobe PDF 1.2 spec from 1998(!!) specifically highlighted this issue and warned implementers "... actions that are self-referential should not be subsequently executed; ..."
e.g. slide 18 and ObjStm loops. ObjStm's were introduced in PDF 1.5 by Adobe and have always been explicitly described as "form[ing] a directed acyclic graph". Acyclic says it all.
e.g. slide 20 is completely invalid PDF and has been since 2000 (Adobe PDF 1.3) which explicitly defined what can and cannot appear in PS/Type4 functions. "def" has always been explicitly prohibited in every PDF spec since then.
Slide 9 also shows the title from the Adobe PDF 1.7 specification, published back in 2006, but on slide 2 they mention ISO 32000-2:2017, so it is hard to know what they baselined from. Although their whitepaper does reference several of the more common PDF malware studies, they miss many significant publications (e.g. Australia DSTO "Threat Modelling Adobe PDF", the USA NSA document from 2008, etc) - and then claim they are the first... Hmmm.
But I do 100\% agree that we need to get to a formal DDL description with added abilities to declare what kind of indirect references are valid vs invalid, identified data redundancies (e.g. /Length keys), etc. I have been imagining that TA1 might soon start identifying data redundancies in the PDF spec that are not just obvious (which is what this paper is, to me), but more insidious and buried.
And Issue \#1 on GitHub (https://github.com/RUB-NDS/PDF101/issues/1):
Appreciate all the hard work you put into this suite of PDF test cases! I was looking into incorporating them as some tests in a project of mine and noticed that they don't seem to load correctly using Apache PDFBox.
Using .00-original.pdf as an example, the startref in the trailer is 798. If I open up the PDF in a hex editor byte offset 798 (0x31E) points at the first ASCII space character of (This is the 2nd page) rather than the appropriate xref section. If my math is right, for .00-original.pdf this should be 0x34C (844).
This only appears to impact Apache PDFBox in non-lenient mode. All of the PDF101 sample PDFs I inspected appear to have a similar issue with the startxref in the trailer being invalid. Although that's a good test case in itself, each PDF should probably be syntactically valid except for the specific thing being analyzed. At first I thought my malware scanner was getting a 100\% success rate before I realized it was just because it was rejecting every test PDF as having an invalid trailer 😁
".00-original.pdf" occurs 4 times in the GitHub repo - all under PDF101{\textbackslash}01-testsuite{\textbackslash}03-manipulation{\textbackslash}03-content-masking in the 01, 02, 03 and 04-* folders. All 4 files are different but the only one with a 798 offset is in PDF101{\textbackslash}01-testsuite{\textbackslash}03-manipulation{\textbackslash}03-content-masking{\textbackslash}03-pag-confusion.  However all 4 files have a broken xref according to various tools (mutool clean -s, pdfcpu validate, etc).},
	file = {us-20-Mueller-Portable-Document-Flaws-101-wp.pdf:/Users/tullsen/Zotero/storage/DFWUK5XU/us-20-Mueller-Portable-Document-Flaws-101-wp.pdf:application/pdf;Jens Müller et al. - 2020 - Portable Document Flaws 101.pdf:/Users/tullsen/Zotero/storage/76A5WH9E/Jens Müller et al. - 2020 - Portable Document Flaws 101.pdf:application/pdf},
}

@article{maiorcaAdversarialMalwareDetection2019,
	title = {Towards {Adversarial} {Malware} {Detection}: {Lessons} {Learned} from {PDF}-based {Attacks}},
	volume = {52},
	issn = {0360-0300},
	shorttitle = {Towards {Adversarial} {Malware} {Detection}},
	url = {https://doi.org/10.1145/3332184},
	doi = {10.1145/3332184},
	abstract = {Malware still constitutes a major threat in the cybersecurity landscape, also due to the widespread use of infection vectors such as documents. These infection vectors hide embedded malicious code to the victim users, facilitating the use of social engineering techniques to infect their machines. Research showed that machine-learning algorithms provide effective detection mechanisms against such threats, but the existence of an arms race in adversarial settings has recently challenged such systems. In this work, we focus on malware embedded in PDF files as a representative case of such an arms race. We start by providing a comprehensive taxonomy of the different approaches used to generate PDF malware and of the corresponding learning-based detection systems. We then categorize threats specifically targeted against learning-based PDF malware detectors using a well-established framework in the field of adversarial machine learning. This framework allows us to categorize known vulnerabilities of learning-based PDF malware detectors and to identify novel attacks that may threaten such systems, along with the potential defense mechanisms that can mitigate the impact of such threats. We conclude the article by discussing how such findings highlight promising research directions towards tackling the more general challenge of designing robust malware detectors in adversarial settings.},
	language = {English},
	number = {4},
	urldate = {2020-05-09},
	journal = {ACM Computing Surveys},
	author = {Maiorca, Davide and Biggio, Battista and Giacinto, Giorgio},
	month = aug,
	year = {2019},
	keywords = {PDF files, machine learning, infection vectors, evasion attacks, javascript, vulnerabilities},
	pages = {78:1--78:36},
	file = {Maiorca et al. - 2019 - Towards Adversarial Malware Detection Lessons Lea.pdf:/Users/tullsen/Zotero/storage/PTWDMYI8/Maiorca et al. - 2019 - Towards Adversarial Malware Detection Lessons Lea.pdf:application/pdf},
}

@inproceedings{tranPDFPhantomExploitingPDF2019,
	title = {{PDFPhantom}: {Exploiting} {PDF} {Attacks} {Against} {Academic} {Conferences}' {Paper} {Submission} {Process} with {Counterattack}},
	shorttitle = {{PDFPhantom}},
	url = {https://ieeexplore.ieee.org/document/8992996},
	doi = {10.1109/UEMCON47517.2019.8992996},
	abstract = {First launched nearly three decades ago, PDF, which stands for Portable Document Format, has become one of the most common formats for text documents. Nowadays, PDF is widely used by individuals, companies, and organizations. At most notable academic conferences, including IEEEs, papers are required to be submitted in PDF format. Along with its increasing popularity in recent times, PDF has been a target for many attacks, some of which are called content-masking attacks. Content-masking attacks are based on the idea of causing underlying text, which is extracted by text-mining softwares and read by machines, to be displayed differently to humans. Inspired by the same idea but is designed and implemented differently, our proposed attack is against the paper submission process at large academic conferences, which consists of two subprocesses: plagiarism detection and automatic assignment of reviewers to papers. Our attack overcomes many weaknesses of previous attacks. In addition, we introduce a method to avoid content masking attacks against the automatic reviewers' assignment process by preventing attackers from getting true topic information from PDF documents.},
	booktitle = {2019 {IEEE} 10th {Annual} {Ubiquitous} {Computing}, {Electronics} {Mobile} {Communication} {Conference} ({UEMCON})},
	publisher = {IEEE},
	author = {Tran, Dat and Jaiswal, Chetan},
	month = oct,
	year = {2019},
	keywords = {PDF, portable document format, PDF format, Security, document handling, data mining, PDF documents, Natural Language Processing, text analysis, automatic reviewers, content masking attacks, content-masking attacks, Cyberattack, notable academic conferences, paper submission process, PDF attacks, Server Exploit, text documents, text-mining softwares, underlying text, Vulnerability},
	pages = {0736--0743},
	file = {Tran and Jaiswal - 2019 - PDFPhantom Exploiting PDF Attacks Against Academi.pdf:/Users/tullsen/Zotero/storage/9N6JCNJH/Tran and Jaiswal - 2019 - PDFPhantom Exploiting PDF Attacks Against Academi.pdf:application/pdf},
}

@inproceedings{g.endignouxCaradocPragmaticApproach2016,
	title = {Caradoc: {A} {Pragmatic} {Approach} to {PDF} {Parsing} and {Validation}},
	shorttitle = {{CARADOC}},
	doi = {10.1109/SPW.2016.39},
	abstract = {PDF has become a de facto standard for exchanging electronic documents, for visualization as well as for printing. However, it has also become a common delivery channel for malware, and previous work has highlighted features that lead to security issues. In our work, we focus on the structure of the format, independently from specific features. By methodically testing PDF readers against hand-crafted files, we show that the interpretation of PDF files at the structural level may cause some form of denial of service, or be ambiguous and lead to rendering inconsistencies among readers. We then propose a pragmatic solution by restricting the syntax to avoid common errors, and propose a formal grammar for it. We explain how data consistency can be validated at a finer-grained level using a dedicated type checker. Finally, we assess this approach on a set of real-world files and show that our proposals are realistic.},
	language = {English},
	booktitle = {2016 {IEEE} {Security} and {Privacy} {Workshops} ({SPW})},
	publisher = {IEEE},
	author = {{G. Endignoux} and {O. Levillain} and {J. Migeon}},
	month = may,
	year = {2016},
	keywords = {PDF, Portable document format, Malware, portable document format, Security, Dictionaries, document handling, Streaming media, document structure, invasive software, malware, PDF readers, electronic documents, caradoc, Caradoc, computational linguistics, data consistency, data integrity, dedicated type checker, denial of service, formal grammar, grammars, parser, PDF parsing, PDF validation, pragmatic approach, security issues, Syntactics},
	pages = {126--139},
	annote = {Summary
Conclusion directly supports SafeDocs - and even proposes a PDF subset standard!
Identifies shortcomings in PDF spec, and the need for a grammar
JHOVE PDF validation uses a "blacklist approach".},
	file = {Endignoux et al. - 2016 - Caradoc A Pragmatic Approach to PDF Parsing and V.pdf:/Users/tullsen/Zotero/storage/8E466Y3Q/Endignoux et al. - 2016 - Caradoc A Pragmatic Approach to PDF Parsing and V.pdf:application/pdf;2017-03-03-toulouse-hc-endignoux-slides.pdf:/Users/tullsen/Zotero/storage/A8XSCIIA/2017-03-03-toulouse-hc-endignoux-slides.pdf:application/pdf},
}

@inproceedings{ericfiliolPortableDocumentFormat2008,
	title = {Portable {Document} {Format} ({PDF}) {Security} {Analysis} and {Malware} {Threats}},
	abstract = {Adobe Portable Document Format has become the most widespread and used document description format throughout the world. It is also a true programming language of its own, strongly dedicated to document creation and manipulation which has accumulated a lot of powerful programming features from version to version. Until now, no real, exploratory security analysis of the PDF and of its programming power with respect to malware attacks has been conducted. Only a very few cases of attacks are known, which exploit vulnerabilities in the management of external programming languages (Javacript, VBS). This paper presents an in-depth security analysis of the PDF programming features and capabilities, independently from any vulnerability. The aim is to exhaustively explore and evaluate the risk attached to PDF language-based malware which could successfully subvert some of PDF primitives in order to conduct malware based attacks. Along with a dedicated PDF document analysis and manipulation tool we have designed, this paper presents two proof-of-concepts on an algorithmic point of view, which clearly demonstrate the existence of such a risk. We also suggest some security measures at the users’level to reduce this risk.},
	language = {English},
	publisher = {BlackHat},
	author = {{Eric Filiol} and {Alexandre Blonce} and {Laurent Fraysignes}},
	month = feb,
	year = {2008},
	pages = {20},
	annote = {Summary
At the time (Blackhat 2008 conference), a good summary and approach of looking at the PDF feature-set / language from a more theoretical "what is possible" point of view, rather than the tired ol' "examine malicious documents and find out why they can occur in PDF".
However now quite dated as all the obvious stuff is stated: various actions, URL, form submit, JavaScript, screen control (e.g. full screen to mimic something, like a website) etc.
Having said that some PDF processors still don't take sufficient lengths to manage malicious documents even with these obvious attack vectors! Why is that?!?},
	file = {Eric Filiol et al. - 2008 - Portable Document Format (PDF) Security Analysis a.pdf:/Users/tullsen/Zotero/storage/7N2ZIIJ8/Eric Filiol et al. - 2008 - Portable Document Format (PDF) Security Analysis a.pdf:application/pdf},
}

@inproceedings{chemaalonsoDisclosingPrivateInformation2009,
	address = {Amsterdam},
	title = {Disclosing {Private} {Information} from {Metadata}, hidden info and lost data},
	shorttitle = {Metadata},
	url = {https://www.blackhat.com/presentations/bh-europe-09/Alonso_Rando/Blackhat-Europe-09-Alonso-Rando-Fingerprinting-networks-metadata-whitepaper.pdf},
	abstract = {Documents  contain  metadata  and  hidden  information  that  can  be  used  to  disclose  private  data  and  to fingerprint  an  organization  and  its  network computers. This  document shows  what kinds  of  data  can be  found,  how  to extract them and proposes some solutions to the problem stated here.},
	language = {English},
	booktitle = {{BlackHat} {Europe} 2009},
	publisher = {BlackHat},
	author = {{Chema Alonso} and {Enrique Rando} and {Francisco Oca} and {Antonio Guzmán}},
	month = apr,
	year = {2009},
	note = {https://www.blackhat.com/html/bh-europe-09/bh-eu-09-archives.html\#Alonso},
	file = {Disclosing Private Information from Metadata, hidd.pdf:/Users/tullsen/Zotero/storage/T6SAEQLJ/Disclosing Private Information from Metadata, hidd.pdf:application/pdf},
}

@misc{adobeISODraftPDF2007,
	title = {{ISO} {Draft} of the {PDF} 1.7 {Reference}},
	shorttitle = {Changes in {ISO} 32000-1},
	url = {https://www.pdfa.org/wp-content/uploads/2017/08/isoformatting_070719.pdf},
	abstract = {ISO Draft of the PDF 1.7 ReferenceJuly 19, 2007 1ISO Draft of the PDF 1.7 Reference June 4, 20071.0  IntroductionOn January 29, 2007, Adobe announced its intent to release the full Portable Document Format (PDF) 1.7 specification to AIIM, the Enterprise Content Management Association, for the purpose of publication by the International Organization for Standardization (ISO). As part of that process, in the Spring of 2007 Adobe’s PDF experts converted the Adobe PDF 1.7 Reference document into another Adobe document, the ISO Draft. The ISO draft is based on the ISO template and contains edits that make the content from the PDF Refer-ence conform to the ISO documentation rules for International Standards.The following is a summary of the changes that were made to the to the publicly available Adobe PDF 1.7 Reference in order to produce the ISO Draft. The technical content was maintained; only the presentation was changed.
The reasons why Adobe produced this derived ISO Draft are as follows:
* removal of references to Adobe and Adobe product (e.g., Acrobat) information that is in the Adobe PDF Reference so that the document is vendor neutral,
* use of the ISO template and style rules to enable future versions to be more easily pro-duced by the common and familiar ISO processes,
* removal of obsolete material,
* tighten the specification by removing chances for misinterpretation by following the ISO terminology rules.},
	language = {English},
	publisher = {Adobe Systems Inc.},
	author = {{Adobe}},
	month = jun,
	year = {2007},
	file = {2007 - ISO Draft of the PDF 1.7 Reference.pdf:/Users/tullsen/Zotero/storage/A8Z9C79I/2007 - ISO Draft of the PDF 1.7 Reference.pdf:application/pdf},
}

@inproceedings{mcquistinParsingProtocolStandards2020,
	address = {New York, NY, USA},
	series = {{ANRW} '20},
	title = {Parsing {Protocol} {Standards} to {Parse} {Standard} {Protocols}},
	isbn = {978-1-4503-8039-3},
	url = {https://doi.org/10.1145/3404868.3406671},
	doi = {10.1145/3404868.3406671},
	abstract = {Internet protocol standards have been slow to adopt formal protocol description languages and methodologies, and are still largely written as English prose. This makes it hard to check them for correctness, or to automatically derive implementations from standards. Reasons for this are both technical and social. Some methodologies effectively describe complex communication patterns, but cannot model protocol data. Others are unnecessarily tied to particular description formats, or use unfamiliar concepts and terminology, and don't address usability by standards developers. We assess the viability of existing approaches to modelling and parsing protocol data, and identify missing features needed to represent emerging protocols. We present a typed protocol representation that can describe: (i) the format of protocol data, including data-dependent formats; (ii) contextual information needed to maintain parser state, where correct parsing may depend on out-of-band information or prior packets; and (iii) transformations and helper functions needed for multi-stage parsing. We discuss social barriers to adoption, and describe a set of principles to encourage use of formal languages within the Internet standards process. We show how to integrate our approach with the existing standards process, using QUIC as an example.},
	urldate = {2020-08-10},
	booktitle = {Proceedings of the {Applied} {Networking} {Research} {Workshop}},
	publisher = {ACM},
	author = {McQuistin, Stephen and Band, Vivian and Jacob, Dejice and Perkins, Colin},
	month = jul,
	year = {2020},
	pages = {25--31},
	file = {McQuistin et al. - 2020 - Parsing Protocol Standards to Parse Standard Proto.pdf:/Users/tullsen/Zotero/storage/UJVPNUXG/McQuistin et al. - 2020 - Parsing Protocol Standards to Parse Standard Proto.pdf:application/pdf},
}

@inproceedings{thomasfischerChallengesAchievingConformance2020,
	address = {Glasgow, Scotland},
	series = {{EURAS} contributions to standardisation research},
	title = {On challenges for achieving conformance to document standards: {Can} {PDF} files conform to the {PDF}/{A}-1b specification?},
	volume = {16},
	url = {https://www.diva-portal.org/smash/record.jsf?pid=diva2%3A1455761&dswid=4564#},
	abstract = {In the context of long-term archival of digital assets, file formats that are standardized and designed for longevity such as PDF/A are preferred. However, due to the complexity and ambiguities in PDF standards, it is far from trivial to either create standard-conformant files or check the conformance of any given file. This study aims at investigating the challenges when checking real-world PDF files from public sector organizations meant for long-term archival for PDF/A conformance. Results show that only a small set of PDF files claims to conform to the PDF/A-1b specification variant and even fewer files pass conformance checks by various conformance checking tools. Challenges for conformance checking tools include both ambiguities in the standards’ technical specifications and limitations in the implementation.},
	language = {English},
	booktitle = {Proceedings 25th {EURAS} {Annual} {Standardisation} {Conference}: {Standards} for {Digital} {Transformation}: {Blockchain} and {Innovation}},
	publisher = {Aachen},
	author = {{Thomas Fischer} and {Björn Lundell} and {Jonas Gamalielsson}},
	month = jul,
	year = {2020},
	note = {URN: urn:nbn:se:his:diva-18861
OAI: oai:DiVA.org:his-18861
DiVA, id: diva2:1455761},
	pages = {21--40},
}

@techreport{christianmainkaVulnerabilityReportAttacks2020,
	title = {Vulnerability {Report}: {Attacks} bypassing the signature validation in {PDF}},
	url = {https://www.pdf-insecurity.org/},
	abstract = {Digitally signed PDFs are used in contracts, bills, and agreements to guarantee the authenticity and integrity of their content. A typical user would assume that digitally signed PDF files are final and cannot be further modified. However, various changes like adding annotations to a signed PDF or filling out form fields are allowed and do not invalidate PDF signatures.
In this report, we show that this flexibility allows attackers to completely change a document’s content while keeping the original signature validation status untouched. Our attacks work in a novel attacker model, which allows attackers hiding content in a PDF. After signing this PDF by a benign entity, the attackers reveal the hidden content by using permitted manipulations. Our results reveal that out of 27 tested PDF viewers, 15 of them.},
	language = {English},
	author = {{Christian Mainka} and {Vladislav Mladenov} and {Simon Rohlmann} and {J¨org} and {Jorg Schwenk}},
	month = mar,
	year = {2020},
	pages = {19},
	file = {Christian Mainka et al. - 2020 - Vulnerability Report Attacks bypassing the signat.pdf:/Users/tullsen/Zotero/storage/H2N5CM24/Christian Mainka et al. - 2020 - Vulnerability Report Attacks bypassing the signat.pdf:application/pdf},
}

@article{heVirtualSampleGeneration2020,
	title = {Virtual {Sample} {Generation} for {Retraining} the {Malicious} {PDF} {Detection} {Model}},
	volume = {1584},
	issn = {1742-6596},
	url = {https://doi.org/10.1088%2F1742-6596%2F1584%2F1%2F012056},
	doi = {10.1088/1742-6596/1584/1/012056},
	abstract = {PDF files are adopted for launching cyberattacks because of their popularity and the increasing number of relative vulnerabilities. Machine learning algorithms are developed to detect the maliciousness of PDF files. As the exploits of new vulnerabilities occur, the assumption that the training data and the test data share the same distribution does not hold and the ability of origin model to detect exploits of new vulnerabilities weakens gradually. In a real environment, it is very difficult to obtain numerous samples of exploits with the same CVE. and the machine learning models are difficult to be improved by retraining. Virtual sample generation could be used to generate sufficient virtual samples by small sample sets to improve the generalization of the existing model. A new VSG algorithm based on prior knowledge is proposed in this paper, which performs better than other VSG algorithms in improving the detection on exploits of new vulnerabilities.},
	language = {en},
	urldate = {2020-07-20},
	journal = {Journal of Physics: Conference Series},
	author = {He, Kang and Liu, Long and Lu, Dong-Zhe and Zhu, Yuefei},
	month = jul,
	year = {2020},
	note = {Publisher: IOP Publishing},
	pages = {012056},
	file = {He et al. - 2020 - Virtual Sample Generation for Retraining the Malic.pdf:/Users/tullsen/Zotero/storage/DNRY4KSV/He et al. - 2020 - Virtual Sample Generation for Retraining the Malic.pdf:application/pdf},
}

@article{chongCodeLevelModelChecking2020,
	title = {Code-{Level} {Model} {Checking} in {theSoftware} {Development} {Workflow}},
	abstract = {This experience report describes a style of applying symbolic model checking developed over the course of four years at Amazon Web Services (AWS). Lessons learned are drawn from proving properties of numerous C-based systems, e.g., custom hypervisors, encryption code, boot loaders, and an IoT operating system. Using our methodology, we find that we can prove the correctness of industrial low-level C-based systems with reasonable effort and predictability. Furthermore, AWS developers are increasingly writing their own formal specifications. All proofs discussed in this paper are publicly available on GitHub.},
	language = {English},
	author = {Chong, Nathan and Cook, Byron and Kallas, Konstantinos and Khazem, Kareem and Monteiro, Felipe R and Schwartz-Narbonne, Daniel and Tasiran, Serdar and Tautschnig, Michael and Tuttle, Mark R},
	year = {2020},
	pages = {10},
	file = {Chong et al. - 2020 - Code-Level Model Checking in theSoftware Developme.pdf:/Users/tullsen/Zotero/storage/6RVKJS7K/Chong et al. - 2020 - Code-Level Model Checking in theSoftware Developme.pdf:application/pdf},
}

@misc{jayfreemanAnotherAndroidMaster,
	title = {Yet {Another} {Android} {Master} {Key} {Bug} - {Jay} {Freeman} (saurik)},
	url = {http://www.saurik.com/id/19},
	urldate = {2020-05-08},
	author = {{Jay Freeman}},
	note = {Library Catalog: www.saurik.com},
}

@misc{jayfreemanExploitFixAndroid,
	title = {Exploit (\& {Fix}) {Android} "{Master} {Key}" - {Jay} {Freeman} (saurik)},
	url = {http://www.saurik.com/id/17},
	urldate = {2020-05-08},
	author = {Jay Freeman},
	note = {Library Catalog: www.saurik.com},
}

@misc{jayfreemanAndroidBugSuperior,
	title = {Android {Bug} {Superior} to {Master} {Key} - {Jay} {Freeman} (saurik)},
	url = {http://www.saurik.com/id/18},
	urldate = {2020-05-08},
	author = {{Jay Freeman}},
	note = {Library Catalog: www.saurik.com},
}

@inproceedings{chotikakamthornElectronicDocumentData1998,
	title = {Electronic document data hiding technique using inter-character space},
	url = {https://ieeexplore.ieee.org/abstract/document/743799},
	doi = {10.1109/APCCAS.1998.743799},
	abstract = {The problem of hiding data in electronic documents for adding captions or fingerprints is considered. A new data hiding scheme, which utilizes differences in character-to-character space for data embedding, is presented. Coding and enhancing techniques are proposed as a means to alleviate effects of noise and distortion, possibly arising during document copying or scanning process. Experimental results using documents written in Thai and English are provided to demonstrate its practical applicability. The method's weakness and future work are also discussed.},
	language = {English},
	booktitle = {{IEEE}. {APCCAS} 1998. 1998 {IEEE} {Asia}-{Pacific} {Conference} on {Circuits} and {Systems}. {Microelectronics} and {Integrating} {Systems}. {Proceedings} ({Cat}. {No}.{98EX242})},
	author = {Chotikakamthorn, N.},
	month = nov,
	year = {1998},
	keywords = {document handling, data encapsulation, text analysis, electronic documents, captions, character-to-character space, coding techniques, data embedding, Data encapsulation, data hiding technique, Decoding, document copying, Encoding, enhancing techniques, Fingerprint recognition, fingerprints, Information technology, inter-character space, Military communication, Natural languages, scanning process, Space technology},
	pages = {419--422},
	file = {Chotikakamthorn - 1998 - Electronic document data hiding technique using in.pdf:/Users/tullsen/Zotero/storage/6WL5LKRJ/Chotikakamthorn - 1998 - Electronic document data hiding technique using in.pdf:application/pdf},
}

@inproceedings{jenkinsReinventingPrivilegeDrop2018,
	address = {New York, NY, USA},
	series = {{HoTSoS} '18},
	title = {Reinventing the {Privilege} {Drop}: {How} {Principled} {Preservation} of {Programmer} {Intent} {Would} {Prevent} {Security} {Bugs}},
	isbn = {978-1-4503-6455-3},
	shorttitle = {Reinventing the {Privilege} {Drop}},
	url = {http://doi.acm.org/10.1145/3190619.3190635},
	doi = {10.1145/3190619.3190635},
	abstract = {The principle of least privilege requires that components of a program have access to only those resources necessary for their proper function. Defining proper function is a difficult task. Existing methods of privilege separation, like Control Flow Integrity and Software Fault Isolation, attempt to infer proper function by bridging the gaps between language abstractions and hardware capabilities. However, it is programmer intent that defines proper function, as the programmer writes the code that becomes law. Codifying programmer intent into policy is a promising way to capture proper function; however, often onerous policy creation can unnecessarily delay development and adoption. In this paper, we demonstrate the use of our ELF-based access control (ELFbac), a novel technique for policy definition and enforcement. ELFbac leverages the common programmer's existing mental model of scope, and allows for policy definition at the Application Binary Interface (ABI) level. We consider the roaming vulnerability found in OpenSSH, and demonstrate how using ELFbac would have provided strong mitigation with minimal program modification. This serves to illustrate the effectiveness of ELFbac as a means of privilege separation in further applications, and the intuitive, yet robust nature of our general approach to policy creation.},
	language = {English},
	urldate = {2019-06-06},
	booktitle = {Proceedings of the 5th {Annual} {Symposium} and {Bootcamp} on {Hot} {Topics} in the {Science} of {Security}},
	publisher = {ACM},
	author = {Jenkins, Ira Ray and Bratus, Sergey and Smith, Sean and Koo, Maxwell},
	year = {2018},
	note = {event-place: Raleigh, North Carolina},
	keywords = {ELFbac, langsec, openSSH, privilege separation, vulnerability mitigation},
	pages = {3:1--3:9},
	file = {Jenkins et al. - 2018 - Reinventing the Privilege Drop How Principled Pre.pdf:/Users/tullsen/Zotero/storage/D53PFSAK/Jenkins et al. - 2018 - Reinventing the Privilege Drop How Principled Pre.pdf:application/pdf},
}

@inproceedings{mladenovTrillionDollarRefund2019,
	address = {London, United Kingdom},
	series = {{CCS} '19},
	title = {1 {Trillion} {Dollar} {Refund}: {How} {To} {Spoof} {PDF} {Signatures}},
	isbn = {978-1-4503-6747-9},
	shorttitle = {1 {Trillion} {Dollar} {Refund}},
	url = {https://doi.org/10.1145/3319535.3339812},
	doi = {10.1145/3319535.3339812},
	abstract = {The Portable Document Format (PDF) is the de-facto standard for document exchange worldwide. To guarantee the authenticity and integrity of documents, digital signatures are used. Several public and private services ranging from governments, public enterprises, banks, and payment services rely on the security of PDF signatures. In this paper, we present the first comprehensive security evaluation on digital signatures in PDFs. We introduce three novel attack classes which bypass the cryptographic protection of digitally signed PDF files allowing an attacker to spoof the content of a signed PDF. We analyzed 22 different PDF viewers and found 21 of them to be vulnerable, including prominent and widely used applications such as Adobe Reader DC and Foxit. We additionally evaluated eight online validation services and found six to be vulnerable. A possible explanation for these results could be the absence of a standard algorithm to verify PDF signatures -- each client verifies signatures differently, and attacks can be tailored to these differences. We, therefore, propose the standardization of a secure verification algorithm, which we describe in this paper. All findings have been responsibly disclosed, and the affected vendors were supported during fixing the issues. As a result, three generic CVEs for each attack class were issued [50-52]. Our research on PDF signatures and more information is also online available at https://www.pdf-insecurity.org/.},
	language = {English},
	urldate = {2020-05-09},
	booktitle = {Proceedings of the 2019 {ACM} {SIGSAC} {Conference} on {Computer} and {Communications} {Security}},
	publisher = {Association for Computing Machinery},
	author = {Mladenov, Vladislav and Mainka, Christian and Meyer zu Selhausen, Karsten and Grothe, Martin and Schwenk, Jörg},
	month = nov,
	year = {2019},
	keywords = {pdf, pdf security, pdf signatures},
	pages = {1--14},
	file = {Mladenov et al. - 2019 - 1 Trillion Dollar Refund How To Spoof PDF Signatu.pdf:/Users/tullsen/Zotero/storage/32V2GJUP/Mladenov et al. - 2019 - 1 Trillion Dollar Refund How To Spoof PDF Signatu.pdf:application/pdf},
}

@inproceedings{sergeevAnalysisMethodsHiding2020,
	address = {Moscow, Russia},
	title = {Analysis of {Methods} for {Hiding} {Information} in {PDF} {Documents} and {Opportunities} for {Their} {Progress}},
	url = {https://ieeexplore.ieee.org/abstract/document/9059117},
	doi = {10.1109/REEPE49198.2020.9059117},
	abstract = {Notwithstanding the popularity of multimedia files for steganography purposes, other files, whether binary data files, executables or text based files, can also be used to hide information. The widespread use of PDF files can make its use for this purpose an interesting and practical solution. In the article, an analysis was made of the structures of PDF documents, the use of steganography capabilities and their detection. A study of steganography algorithms from open sources was conducted and based on the analysis, an algorithm for detecting steganography was proposed. For this purpose, an integrated approach was developed.},
	language = {English},
	booktitle = {2020 {International} {Youth} {Conference} on {Radio} {Electronics}, {Electrical} and {Power} {Engineering} ({REEPE})},
	publisher = {IEEE},
	author = {Sergeev, Alexander V. and Khorev, Pavel B.},
	month = mar,
	year = {2020},
	keywords = {PDF, Portable document format, Dictionaries, Streaming media, Containers, steganography, Arrays, data hiding, Encoding, analysis, Terrorism},
	pages = {1--4},
	file = {Sergeev and Khorev - 2020 - Analysis of Methods for Hiding Information in PDF .pdf:/Users/tullsen/Zotero/storage/H9NAKJ8K/Sergeev and Khorev - 2020 - Analysis of Methods for Hiding Information in PDF .pdf:application/pdf},
}

@misc{danielschwartz-narbonneHowIntegrateFormal2020,
	title = {How to integrate formal proofs into software development},
	url = {https://www.amazon.science/blog/how-to-integrate-formal-proofs-into-software-development},
	abstract = {ICSE paper presents techniques piloted by Amazon Web Services’ Automated Reasoning team.},
	language = {English},
	urldate = {2020-07-09},
	journal = {Amazon Science},
	author = {{Daniel Schwartz-Narbonne}},
	month = may,
	year = {2020},
	note = {Library Catalog: www.amazon.science},
}

@article{poeplauSymbolicExecutionSYMCC2020,
	title = {Symbolic execution with {SYMCC}: {Don}’t interpret, compile!},
	url = {http://www.s3.eurecom.fr/docs/usenixsec20_symcc.pdf},
	abstract = {A major impediment to practical symbolic execution is speed, especially when compared to near-native speed solutions like fuzz testing. We propose a compilation-based approach to symbolic execution that performs better than state-of-the-art implementations by orders of magnitude. We present SYMCC, an LLVM-based C and C++ compiler that builds concolic execution right into the binary. It can be used by software developers as a drop-in replacement for clang and clang++, and we show how to add support for other languages with little effort. In comparison with KLEE, SYMCC is faster by up to three orders of magnitude and an average factor of 12. It also outperforms QSYM, a system that recently showed great performance improvements over other implementations, by up to two orders of magnitude and an average factor of 10. Using it on real-world software, we found that our approach consistently achieves higher coverage, and we discovered two vulnerabilities in the heavily tested OpenJPEG project, which have been conﬁrmed by the project maintainers and assigned CVE identiﬁers.},
	language = {English},
	author = {Poeplau, Sebastian and Francillon, Aurélien},
	year = {2020},
	note = {https://github.com/eurecom-s3/symcc},
	pages = {18},
	file = {Poeplau and Francillon - Symbolic execution with SYMCC Don’t interpret, co.pdf:/Users/tullsen/Zotero/storage/3TH6NMLK/Poeplau and Francillon - Symbolic execution with SYMCC Don’t interpret, co.pdf:application/pdf},
}

@article{brownSysStaticSymbolic2020,
	title = {Sys: a {Static}/{Symbolic} {Tool} for {Finding} {Good} {Bugs} in {Good} ({Browser}) {Code}},
	url = {https://cseweb.ucsd.edu/~dstefan/pubs/brown:2020:sys.pdf},
	abstract = {We describe and evaluate an extensible bug-ﬁnding tool, Sys, designed to automatically ﬁnd security bugs in huge codebases, even when easy-to-ﬁnd bugs have been already picked clean by years of aggressive automatic checking. Sys uses a two-step approach to ﬁnd such tricky errors. First, it breaks down large—tens of millions of lines—systems into small pieces using user-extensible static checkers to quickly ﬁnd and mark potential errorsites. Second, it uses user-extensible symbolic execution to deeply examine these potential errorsites for actual bugs. Both the checkers and the system itself are small (6KLOC total). Sys is ﬂexible, because users must be able to exploit domain- or system-speciﬁc knowledge in order to detect errors and suppress false positives in real codebases. Sys ﬁnds many security bugs (51 bugs, 43 conﬁrmed) in wellchecked code—the Chrome and Firefox web browsers—and code that some symbolic tools struggle with—the FreeBSD operating system. Sys’s most interesting results include: an exploitable, cash bountied CVE in Chrome that was ﬁxed in seven hours (and whose patch was backported in two days); a trio of bountied bugs with a CVE in Firefox; and a bountied bug in Chrome’s audio support.},
	language = {English},
	journal = {29th \{USENIX\} Security Symposium Security 20},
	author = {Brown, Fraser and Stefan, Deian and Engler, Dawson},
	year = {2020},
	pages = {18},
	file = {Brown et al. - Sys a StaticSymbolic Tool for Finding Good Bugs .pdf:/Users/tullsen/Zotero/storage/I9SU2YHJ/Brown et al. - Sys a StaticSymbolic Tool for Finding Good Bugs .pdf:application/pdf},
}

@misc{terenceparrANTLR,
	title = {{ANTLR}},
	url = {https://www.antlr.org/},
	abstract = {ANTLR (ANother Tool for Language Recognition) is a powerful parser generator for reading, processing, executing, or translating structured text or binary files. It's widely used to build languages, tools, and frameworks. From a grammar, ANTLR generates a parser that can build and walk parse trees.},
	language = {English},
	urldate = {2019-08-09},
	author = {{Terence Parr}},
}

@inproceedings{hoscheleMiningInputGrammars2016,
	address = {Singapore, Singapore},
	series = {{ASE} 2016},
	title = {Mining input grammars from dynamic taints},
	isbn = {978-1-4503-3845-5},
	url = {https://doi.org/10.1145/2970276.2970321},
	doi = {10.1145/2970276.2970321},
	abstract = {Knowing which part of a program processes which parts of an input can reveal the structure of the input as well as the structure of the program. In a URL {\textless}pre{\textgreater}http://www.example.com/path/{\textless}/pre{\textgreater}, for instance, the protocol {\textless}pre{\textgreater}http{\textless}/pre{\textgreater}, the host {\textless}pre{\textgreater}www.example.com{\textless}/pre{\textgreater}, and the path {\textless}pre{\textgreater}path{\textless}/pre{\textgreater} would be handled by different functions and stored in different variables. Given a set of sample inputs, we use dynamic tainting to trace the data flow of each input character, and aggregate those input fragments that would be handled by the same function into lexical and syntactical entities. The result is a context-free grammar that reflects valid input structure. In its evaluation, our AUTOGRAM prototype automatically produced readable and structurally accurate grammars for inputs like URLs, spreadsheets or configuration files. The resulting grammars not only allow simple reverse engineering of input formats, but can also directly serve as input for test generators.},
	urldate = {2020-07-08},
	booktitle = {Proceedings of the 31st {IEEE}/{ACM} {International} {Conference} on {Automated} {Software} {Engineering}},
	publisher = {Association for Computing Machinery},
	author = {Höschele, Matthias and Zeller, Andreas},
	month = aug,
	year = {2016},
	keywords = {context-free grammars, dynamic tainting, fuzzing, Input formats},
	pages = {720--725},
	file = {Höschele and Zeller - 2016 - Mining input grammars from dynamic taints.pdf:/Users/tullsen/Zotero/storage/NZCXVMWY/Höschele and Zeller - 2016 - Mining input grammars from dynamic taints.pdf:application/pdf},
}

@misc{isotc130jwg7ISODTS2353412019,
	title = {{ISO}/{DTS} 23534-1 {Image} technology colour management – {Interoperability} conformance specification: {Architecture} for process colour plus spot colour overprint simulation – {Part} 1: {Colorimetric} processing},
	copyright = {Copyright ISO},
	shorttitle = {{iccMAX} {ICS} overprint},
	url = {https://www-test.iso.org/standard/75951.html},
	abstract = {ISO 20677-1 defines specifications that provide a platform for defining extended (iccMAX) colour management profiles and systems for various colour workflow domains. It provides a platform for which domain specific specifications can be defined that make use of iccMAX extensions to the existing crossplatform profile format of ISO 15076-1. Thus, there is greater flexibility for defining colour transforms and profile connection spaces to meet needs that cannot easily be met with ISO 15076-1. It is not envisioned that all colour management systems that use ISO 20677-1 will implement all the features or capabilities it specifies. Requirements specifying restrictions to iccMAX that apply to a particular workflow are defined in workflow domain specifications known as Interoperability Conformance Specifications, of which this document is an example. Additionally, for some domain specific workflows it is envisioned that workflows will connect both to profiles defined by ISO 20677-1 (iccMAX) and those defined by ISO 15076-1.
An Interoperability Conformance Specification (ICS) is approved and registered by the International Color Consortium (ICC). It defines minimum structural and operational requirements for writing and reading ICC profiles in order to address a specific problem and/or functionality that cannot readily be handled using the profile format defined by ISO 15076-1. An ICS document essentially defines restrictions to ISO 20677-1 for a specific set of use case scenarios.
This part of this ICS defines use case scenarios that utilize a profile that transforms tone values of process primaries plus spot colours to a colorimetric PCS which is connected to a colorimetrically based display or output profile for visualization/simulation. Additionally, the ability is optionally provided for named colour information to be included about each process primaries and spot colour input channels.
Other parts of this ICS may define additional optional transforms for additional use case scenarios that utilize a spectrally based PCS or BRDF connection. This specification defines workflow requirements and restrictions to profiles based on ISO 20677-1 for the purpose of providing overprint simulation of process primary inks with additional spot colour inks.
For each part of this specification, the required set of tags for the scenarios outlined is defined together with any optional tags that are permitted based on the set of tags and tag types defined in ISO 20677-1. The connections between profiles are described, processing elements that the CMM is required to support are identified, and CMM processing controls are outlined that differentiate between each explicit use case scenario.},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 130 JWG 7}},
	month = may,
	year = {2019},
	note = {ISO publicatons are licensed and are not freely available.},
}

@misc{isotc130jwg7ISO15076120102010,
	title = {{ISO} 15076-1:2010 {Image} technology colour management - {Architecture}, profile format and data structure - {Part} 1: {Based} on {ICC}.1:2010},
	copyright = {Copyright ISO},
	url = {https://www.iso.org/standard/54754.html},
	language = {English, Japanese},
	publisher = {ISO},
	author = {{ISO TC 130 JWG 7}},
	month = dec,
	year = {2010},
	note = {Identical to ICC.1:2010 (Profile version 4.3.0.0) published by ICC.
Japanese document X9207 is a translation of ISO 15076-1:2010.
ISO publications are licensed and are not freely available.},
}

@misc{isotc130jwg7ISO15076120052005,
	title = {{ISO} 15076-1:2005 {Image} technology colour management - {Architecture}, profile format and data structure - {Part} 1: {Based} on {ICC}.1:2004-10},
	copyright = {Copyright ISO},
	url = {https://www.iso.org/standard/40317.html},
	language = {English},
	publisher = {ISO},
	author = {ISO TC 130 JWG 7},
	month = dec,
	year = {2005},
	note = {Withdrawn. Replaced by ISO 15076-1:2010.
ISO publications are licensed and are not freely available.},
}

@misc{isotc130jwg7ISO2067720192019,
	title = {{ISO} 20677:2019 {Image} technology colour management - {Extensions} to architecture, profile format and data structure},
	copyright = {Copyright ISO},
	shorttitle = {{ISO} 20677:2019},
	url = {https://www.iso.org/standard/68806.html},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 130 JWG 7}},
	month = feb,
	year = {2019},
	note = {Identical to ICC.2:2019 published by ICC.
ISO publications are licensed and are not freely available.},
}

@misc{WhitePaper272012,
	title = {White {Paper} 27: {Evaluating} color transforms in {ICC} profiles},
	copyright = {Copyright International Color Consortium},
	shorttitle = {White {Paper} 27},
	url = {http://color.org/whitepapers/ICC_White_Paper27_Evaluating_Color_Transforms_in_ICC_profiles.pdf},
	language = {English},
	publisher = {International Color Consortium},
	month = feb,
	year = {2012},
	file = {2012 - White Paper 27 Evaluating color transforms in ICC.pdf:/Users/tullsen/Zotero/storage/XU3DJYV5/2012 - White Paper 27 Evaluating color transforms in ICC.pdf:application/pdf},
}

@misc{WhitePaper262007,
	title = {White {Paper} 26: {Using} the {sRGB}\_v4\_ICC\_preference.icc profile},
	copyright = {Copyright International Color Consortium},
	shorttitle = {White {Paper} 26},
	url = {http://color.org/ICC_White_Paper_26_Using_the_V4_sRGB_ICC_profile.pdf},
	language = {English},
	publisher = {International Color Consortium},
	month = nov,
	year = {2007},
	file = {2007 - White Paper 26 Using the sRGB_v4_ICC_preference.i.pdf:/Users/tullsen/Zotero/storage/DMMZRDTC/2007 - White Paper 26 Using the sRGB_v4_ICC_preference.i.pdf:application/pdf},
}

@misc{WhitePaper282012,
	title = {White {Paper} 28: {Introducing} the new {multiProcessingElements} {Tag} {Type}},
	copyright = {Copyright International Color Consortium},
	shorttitle = {White {Paper} 28},
	url = {http://color.org/whitepapers/ICC_White_Paper28-MultiProcessingElements.pdf},
	abstract = {In November 2006 the ICC approved the multiProcessingElements Tag type as part of the Floating Point Encoding Range addendum to the ICC profile Specification.  This addendum has since been included in the publication of ICC.1:2010.   This new tag type’s primary purposes are to overcome limited precision in ICC profiles by optionally allowing for the direct encoding of floating point data in an ICC profile, remove bounding restrictions for both device side and PCS encoding ranges, and provide for backwards compatibility to the existing ICC profile specification.  A secondary benefit is that this new tag type provides for more flexibility in encoding transforms. Note: the use offloating point in a CMM to apply profiles does not necessarily require the encoding of floating point in profiles.},
	language = {English},
	publisher = {International Color Consortium},
	month = feb,
	year = {2012},
	file = {2012 - White Paper 28 Introducing the new multiProcessin.pdf:/Users/tullsen/Zotero/storage/CM9LSVAT/2012 - White Paper 28 Introducing the new multiProcessin.pdf:application/pdf},
}

@misc{SpecificationICC2001042001,
	title = {Specification {ICC}.1:2001-04 {File} {Format} for {Color} {Profiles} (v2.4.0)},
	copyright = {Copyright International Color Consortium},
	shorttitle = {{ICC}.1 v2.4.0},
	url = {http://color.org/icc_specs2.xalter},
	language = {English},
	publisher = {International Color Consortium},
	month = apr,
	year = {2001},
	file = {2004 - Specification ICC.12001-04 File Format for Color .pdf:/Users/tullsen/Zotero/storage/H98GE8NR/2004 - Specification ICC.12001-04 File Format for Color .pdf:application/pdf},
}

@book{dawnwallnerBuildingICCProfiles2000,
	title = {Building {ICC} {Profiles} - the {Mechanics} and {Engineering}},
	shorttitle = {Building {ICC} {Profiles}},
	url = {http://color.org/iccprofiles.xalter},
	abstract = {If you find yourself in the exciting and adventurous position of needing an International Color Consortium (ICC) device profile and have some knowledge of color and your device, this book will help you learn where to put the bits and bytes to build a profile. If you need to find out exactly what is in an ICC profile, this book will help you to find the location of the data and read the bits and bytes.},
	language = {English},
	author = {{Dawn Wallner}},
	month = apr,
	year = {2000},
	file = {Dawn Wallner - 2000 - Building ICC Profiles - the Mechanics and Engineer.pdf:/Users/tullsen/Zotero/storage/F84HH8N7/Dawn Wallner - 2000 - Building ICC Profiles - the Mechanics and Engineer.pdf:application/pdf},
}

@misc{maxderhakTechnicalNoteGuidelines2009,
	title = {Technical {Note}: {Guidelines} on populating the {profileSequenceDescTag}},
	copyright = {Copyright International Color Consortium},
	shorttitle = {{PSD}},
	url = {http://color.org/technotes.xalter},
	abstract = {The profileSequenceDescTag was introduced with the version 4 ICC profile specification. This tag is defined in the specification as “an array of descriptions of the profile sequence” used to create the device link profile. This tag is required in all version 4 device link profiles. It uses a profileSequenceDescType structure which is populated with appropriate information from the original profiles used to create the device link profile.
This tag type is rather involved and care needs to be taken when populating the tag to avoid problems when parsing it. This document discusses some of the caveats to encoding and parsing the tag and provides some guidelines to mitigate these caveats.},
	language = {English},
	publisher = {International Color Consortium},
	author = {{Max Derhak} and {Rohit Patil}},
	month = mar,
	year = {2009},
	file = {Max Derhak and Rohit Patil - 2009 - Technical Note Guidelines on populating the profi.pdf:/Users/tullsen/Zotero/storage/9PH78XJ6/Max Derhak and Rohit Patil - 2009 - Technical Note Guidelines on populating the profi.pdf:application/pdf},
}

@misc{philgreenTechnicalNoteGuidelines2019,
	title = {Technical {Note}: {Guidelines} on the use of negative {PCSXYZ} values},
	copyright = {Copyright International Color Consortium},
	url = {http://color.org/technotes.xalter},
	abstract = {In ICC.1 specifications (for all ICC v2 and v4 profiles), the PCS is encoded as normalized, media-relative XYZ with an encoding range of [0,1]. In some situations valid CIE XYZ values result in negative PCSXYZ values after chromatic adaptation and this document provides recommendations on how to handle such situations.},
	language = {English},
	publisher = {International Color Consortium},
	author = {{Phil Green}},
	month = mar,
	year = {2019},
	file = {Phil Green - 2019 - Technical Note Guidelines on the use of negative .pdf:/Users/tullsen/Zotero/storage/2NZJ7SK9/Phil Green - 2019 - Technical Note Guidelines on the use of negative .pdf:application/pdf},
}

@misc{georgepawleTechnicalNotePopulating2005,
	title = {Technical {Note}: {Populating} the {Matrix} {Entries} in {lutAtoBType} and {lutAtoBType} of {Version} 4 {ICC} {Profiles}},
	copyright = {Copyright International Color Consortium},
	url = {http://color.org/technotes.xalter},
	abstract = {One of the improvements of the version 4 ICC profile specification is the addition of a set of constants to the matrix operation in the lutAtoBType and lutAtoBType.  The spec describes a simple matrix operation; however, there are some subtleties to using the matrix properly.  This note describes how to populate the entries of the matrix to achieve the expected results.},
	language = {English},
	publisher = {International Color Consortium},
	author = {{George Pawle}},
	month = mar,
	year = {2005},
	file = {George Pawle - 2005 - Technical Note Populating the Matrix Entries in l.pdf:/Users/tullsen/Zotero/storage/TEQY6WWL/George Pawle - 2005 - Technical Note Populating the Matrix Entries in l.pdf:application/pdf},
}

@misc{TechnicalNoteComparing2003,
	title = {Technical {Note}: {Comparing} version 4 to version 2 profiles},
	copyright = {Copyright International Color Consortium},
	url = {http://color.org/technotes.xalter},
	abstract = {Features which have changed when comparing version 4 to version 2 profiles.

The main changes when developing the version 4 specification were made in an attempt to improve interoperability, so that when a particular pair or sequence of profiles is used consistency is improved - regardless of the CMM. Differing interpolation procedures may still result in small interpretation differences but these should be lower than before. Furthermore, increased specificity on the nature of the transforms contained in the profiles should allow improved cross-vendor interoperability when combining different profiles.
The main areas of change are:
* PCS definition and ?Rendering Intents
* Chromatic adaptation
* LUT structures
* Unicode
* N-colorant tags},
	language = {English},
	publisher = {International Color Consortium},
	month = jan,
	year = {2003},
	file = {2003 - Technical Note Comparing version 4 to version 2 p.pdf:/Users/tullsen/Zotero/storage/6PYAUVVB/2003 - Technical Note Comparing version 4 to version 2 p.pdf:application/pdf},
}

@misc{TechnicalNoteEmbedding2018,
	title = {Technical {Note}: {Embedding} an {ICC}.2 ({iccMAX}) profile in an {ICC}.1 profile},
	copyright = {Copyright International Color Consortium},
	url = {http://color.org/technotes.xalter},
	abstract = {The iccMAX specification provides capabilities that are not available in ICC.1. It is expected that support for iccMAX (ICC.2) profiles will initially be limited to certain applications, and there may be some situations where it is desirable to utilize iccMAX capabilities, yet at the same provide a solution for workflows where only version 2 and version 4 (hereafter referred as ICC.1) profiles are supported. One way to achieve this is to embed the ICC.2 profile containing the desired transform into a profile which is constructed according to the ICC.1 specification and which will provide acceptable results when the embedded ICC.2 profile is not supported. When this is done as described below, a CMM supporting only ICC.1 profiles will see the embedded ICC.2 profile as a private tag and proceed to only apply the transform elements from the ICC.1 profile.
To avoid ambiguity and confusion, a CMM control option should be used with the supporting CMM to specifically enable the application of embedded ICC.2 processing elements within an ICC.1 profile.},
	language = {English},
	publisher = {International Color Consortium},
	month = apr,
	year = {2018},
	file = {2018 - Technical Note Embedding an ICC.2 (iccMAX) profil.pdf:/Users/tullsen/Zotero/storage/WRSMT5SB/2018 - Technical Note Embedding an ICC.2 (iccMAX) profil.pdf:application/pdf},
}

@misc{ICC200410Errata2004,
	title = {{ICC}.1:2004-10 {Errata}},
	copyright = {Copyright International Color Consortium},
	shorttitle = {v4},
	url = {http://color.org/icc_specs2.xalter},
	abstract = {(List of all changes made in the publication of ISO 15076:2005)},
	language = {English},
	publisher = {International Color Consortium},
	month = oct,
	year = {2004},
	file = {ICC1-2004-10-Errata.doc:/Users/tullsen/Zotero/storage/H6ZBG6CJ/ICC1-2004-10-Errata.doc:application/msword},
}

@misc{SpecificationICC2004042004,
	title = {Specification {ICC}.1:2004-04 {Image} technology colour management - {Architecture}, profile format, and data structures},
	copyright = {Copyright International Color Consortium},
	shorttitle = {{ICC}.1 v4.2.0},
	url = {http://color.org/icc_specs2.xalter},
	language = {English},
	publisher = {International Color Consortium},
	month = apr,
	year = {2004},
	file = {2004 - Specification ICC.12004-04 Image technology colou.pdf:/Users/tullsen/Zotero/storage/LIKJ5ZD7/2004 - Specification ICC.12004-04 Image technology colou.pdf:application/pdf},
}

@misc{SpecificationICC2001122001,
	title = {Specification {ICC}.1:2001-12 {File} {Format} for {Color} {Profiles} ({Version} 4.0.0)},
	copyright = {Copyright International Color Consortium},
	shorttitle = {{ICC}.1 v4.0.0.0},
	url = {http://color.org/icc_specs2.xalter},
	language = {English},
	publisher = {International Color Consortium},
	month = dec,
	year = {2001},
	file = {2001 - Specification ICC.12001-12 File Format for Color .pdf:/Users/tullsen/Zotero/storage/EX2GGFFK/2001 - Specification ICC.12001-12 File Format for Color .pdf:application/pdf},
}

@misc{SpecificationICC2003092003,
	title = {Specification {ICC}.1:2003-09 {File} {Format} for {Color} {Profiles} ({Version} 4.1.0)},
	copyright = {Copyright International Color Consortium},
	shorttitle = {{ICC}.1 v4.1.0},
	url = {http://color.org/icc_specs2.xalter},
	language = {English},
	publisher = {International Color Consortium},
	month = sep,
	year = {2003},
	file = {2003 - Specification ICC.12003-09 File Format for Color .pdf:/Users/tullsen/Zotero/storage/2ESUT5LD/2003 - Specification ICC.12003-09 File Format for Color .pdf:application/pdf},
}

@misc{CummulativeErrataList2019,
	title = {Cummulative {Errata} {List} for {Specification} {ICC}.1:2010 ({Profile} version 4.3.0.0)},
	copyright = {Copyright International Color Consortium},
	shorttitle = {{ICC}.1 {Errata} {List}},
	language = {English},
	publisher = {International Color Consortium},
	month = apr,
	year = {2019},
	file = {2019 - Specification ICC.12010 (Profile version 4.3.0.0).pdf:/Users/tullsen/Zotero/storage/6DFVMAJA/2019 - Specification ICC.12010 (Profile version 4.3.0.0).pdf:application/pdf},
}

@misc{SpecificationICC20102010,
	title = {Specification {ICC}.1:2010 ({Profile} version 4.3.0.0) {Image} technology colour management - {Architecture}, profile format, and data structure [{REVISION} of {ICC}.1:2004-10]},
	copyright = {Copyright International Color Consortium},
	shorttitle = {{ICC}.1 v4.3.0.0},
	abstract = {ICC.1 specifies a colour profile format and describes the architecture within which it can operate. This architecture supports the exchange of information which specifies the intended colour image processing of digital data. The required reference colour spaces and the data structures (tags) are also specified.
NOTE The technical content of ICC.1:2010 is identical to that of ISO 15076-1:2010},
	language = {English, Japanese},
	publisher = {International Color Consortium},
	year = {2010},
	file = {2010 - Specification ICC.12010 (Profile version 4.3.0.0).pdf:/Users/tullsen/Zotero/storage/YMHMCNYI/2010 - Specification ICC.12010 (Profile version 4.3.0.0).pdf:application/pdf},
}

@misc{WhitePaper522020,
	title = {White {Paper} 52: {iccMAX} {calculatorElement} {Security} {Implementation} {Notes}. {A} {Guide} for implementing secure calculator element processing.},
	copyright = {Copyright International Color Consortium},
	shorttitle = {White {Paper} 52},
	abstract = {Color management with iccMAX profiles defined by either the ISO 20677 or ICC.2 specifications (or Interoperability Conformance Specifications in relation to one of these specifications) utilizes customizable sequences of processing elements to encode color transforms between device color encodings and device independent color encodings. (Note: This provides extensive flexibility over the limited and fixed sequences of transforms utilized by profiles defined by either ISO 15647 or ICC.1 specifications).
The processing elements that can be encoded as part of a MultipleProcessingElementsType based tag provide data for code within an implementing Color Management Module (CMM) to perform the transform implied by each processing element. Most of these transformations have fixed behavior (like applying a matrix, applying a specific function relative to a fixed set of constants, or performing interpolation in relationship to a lookup table).
With ICC.1 based transforms the only way to encode multi-channel non-linear functions is to utilize a multi-dimensional lookup table. In this case the non- linear functions are only approximately reproduced (due to interpolation) and high dimensionality of input channels results in either very large or sparsely sampled lookup tables (limiting accuracy).
When contemplating the functionality required by more complex color management scenarios it was felt that the need to directly encode functions and limited algorithms would be beneficial as the iccMAX specification was developed. Thus, considerable effort was employed to provide more flexible direct encoding of functions and algorithms within a Calculator processing element. Two key aspects that are very significant in the design of the Calculator processing element are security and predictability.
The purpose of this paper is to provide those that both implement and utilize CMM’s that support application of iccMAX profiles containing calculator processing elements an understanding of the security risks involved and implementation guidelines to mitigate such risks and provide predictable results. Three aspects of a calculator element are explored: parsing, validation, and application.},
	language = {English},
	publisher = {International Color Consortium},
	year = {2020},
	note = {Unpublished as of June 2020},
	file = {2020 - White Paper 52 iccMAX calculatorElement Security .pdf:/Users/tullsen/Zotero/storage/5KCSASMM/2020 - White Paper 52 iccMAX calculatorElement Security .pdf:application/pdf},
}

@misc{WhitePaper192005,
	title = {White {Paper} 19: {Reasons} to use {ICC} version 4 in {PDF}/{X}},
	copyright = {Copyright International Color Consortium},
	shorttitle = {White {Paper} 19},
	abstract = {This document was produce in response to a request from ISO TC130/WG2 for a white paper that discusses the need for a change to the PDF/X specification to allow (and recommend) the use of ICC version 4 profiles. At the time of writing (January 2005) PDF/X does not allow the use of version 4 profiles however the ICC strongly recommends that support be provided in a future version.
The ICC produced version 4 of the profile format specification primarily to address a number of problems that users were experiencing when using version 2 ICC profiles. This document explains some of the reasons to move to version 4, providing an overview of the subjects in a non-technical way wherever possible - a more complete technical description of the benefits is available from the ICC web site (www.color.org).},
	language = {English},
	publisher = {International Color Consortium},
	month = jan,
	year = {2005},
	file = {2005 - White Paper 19 Reasons to use ICC version 4 in PD.pdf:/Users/tullsen/Zotero/storage/APK8XC6J/2005 - White Paper 19 Reasons to use ICC version 4 in PD.pdf:application/pdf},
}

@misc{WhitePaper182005,
	title = {White {Paper} 18: {Implementation} {Notes} for the {IccProfLib} {Color} {Management} {Module} ({CMM}) in {SampleICC}},
	copyright = {Copyright International Color Consortium},
	shorttitle = {White {Paper} 18},
	abstract = {The SampleICC project (see http://sampleicc.sourceforge.net) is an open source object oriented C++ development effort that was written to provide an example of how various aspects of color management can be implemented. The basis of SampleIcc was originally written by Max Derhak as a class project for the Color Systems course while pursuing an MS degree at the Rochester Institute of Technology. After extensive revisions (directed by the ICC Architecture Working Group) the project was turned over to the International Color Consortium as a means of helping to describe color management implementation approaches inferred by the ICC Color profile specification (see http://www.color.org).
The SampleICC project contains a platform independent library (named IccProfLib) that provides a complete implementation for reading, writing, and applying ICC profiles. The IccProfLib sub-project has HTML documentation that describes the classes and their interfaces, but the basic relationship between the classes as it relates to applying profiles is not necessarily clear. This document complements the IccProfLib class documentation by describing how the objects interact when applying profiles. This document assumes familiarity with both object oriented programming and the ICC profile specification. Overview information will be given related to classes within IccProfLib. For specific details consult the implementation as defined by the source code. (Note: IccProfLib was initially named IccLib, but has since been changed to avoid conflicts with existing Libraries).
There are multiple ways to go about implementing Color Management (See ICC White Paper \#7 – The role of ICC profiles in a colour reproduction system). The implementation presented in this document represents the fulfillment of a ‘Dumb’ CMM with the smarts of color rendering contained in the ICC profiles themselves. This does not preclude the possibility of implementing a ‘Smart’ CMM based upon the profile file support provided by IccProfLib.},
	language = {English},
	publisher = {International Color Consortium},
	month = sep,
	year = {2005},
	file = {2005 - White Paper 18 Implementation Notes for the IccPr.pdf:/Users/tullsen/Zotero/storage/I9LRG5ZQ/2005 - White Paper 18 Implementation Notes for the IccPr.pdf:application/pdf},
}

@misc{annmccarthyDictionaryTypeMetadata2010,
	title = {Dictionary {Type} and {Metadata} {TAG} {Definition}},
	copyright = {Copyright International Color Consortium},
	abstract = {Scanners and printers, for example, can provide adjusted color behavior options for media, quality modes, document types, ink characteristics, etc. Similarly, in general, devices that handle digitally encoded color content can have various modes and use various materials such that a specialized profile can be used to optimize the device color behavior for those conditions. In the most general sense, a profile may have been built for a particular use that cannot be described or identified using the currently defined ICC profile tags.
The current gap in the profile identification information contained in ICC profiles is addressed in a variety of ways in the interfaces between devices and operating systems, between devices and applications, between devices and users, between operating systems and users, between operating systems and applications, between applications and users, etc. The current methods do not provide consistent, complete, and flexible coverage of profile selection options in general multi-vendor workflow environments. Most significantly, broadly implemented automation of correct profile selection is made difficult if not completely unworkable by the lack of consistent digitally encoded information describing each device profile's correct use case.
The use of ICC profiles and the reliance on color management is increasing with equipment that previously did not rely on color management, e.g., office equipment. With the use of color management in environments such as a business office, the user expectation is that color management is fully automated, with no requirement for the user to select profiles or even understand their function. This is true in home environments as well. Even in commercial digital print shops, workflows with non-expert users can benefit from improved automation in profile selection.
This proposal provides a new tag type for future tags to be used in ICC profiles. Tags based on this tag type may be used by a profile builder to identify the usage conditions for which a profile is intended. This tag type definition provides a consistent structure on which to base future work defining descriptive profile parameter names and values. Tags based on this tag type may be defined incorporating profile information parameters and parameter values registered at http:/ /www.color.org/dictionary.
This proposal is to create a tag type for tags that can be incorporated into any ICC profile of any version. Tags based on the tag type of this proposal are not intended for use in the computational operation of any CMM and so are not restricted to a particular ICC profile version. This proposal provides a structure for use in consistently and openly encoding profile description and usage details in a profile, to enable improved manual and/or automated profile search and selection, so that a selected profile corresponds to the current color management use case.},
	language = {English},
	publisher = {International Color Consortium},
	author = {{Ann McCarthy} and {Max Derhak} and {Lars Borg}},
	month = jan,
	year = {2010},
	file = {Ann McCarthy et al. - 2010 - Dictionary Type and Metadata TAG Definition.pdf:/Users/tullsen/Zotero/storage/M7MJ63U5/Ann McCarthy et al. - 2010 - Dictionary Type and Metadata TAG Definition.pdf:application/pdf},
}

@misc{WhitePaper452018,
	title = {White {Paper} 45: {iccMAX} {MultiProcessingElement} {Calculator} {Programming} ({March} 2018)},
	copyright = {Copyright International Color Consortium},
	shorttitle = {White {Paper} 45},
	abstract = {This white paper provides an introduction to calculator element script programming as well as using the expanded capabilities of programming with ICC Multi-Process Element Calculator Elements using the XML representation of iccMAX profiles provided by the iccFromXML tool in the RefIccMAX toolset. This white paper assumes familiarity with the iccMAX specification.},
	language = {English},
	publisher = {International Color Consortium},
	month = mar,
	year = {2018},
	file = {2018 - White Paper 45 iccMAX MultiProcessingElement Calc.pdf:/Users/tullsen/Zotero/storage/WAFVQEY3/2018 - White Paper 45 iccMAX MultiProcessingElement Calc.pdf:application/pdf},
}

@misc{WhitePaper362012,
	title = {White {Paper} 36: {Embedding} and referencing {ICC} profiles},
	copyright = {Copyright International Color Consortium},
	shorttitle = {White {Paper} 36},
	abstract = {To ensure that the colour data in an image or document is correctly interpreted, many file formats permit an ICC profile defining the image source or destination to be embedded into the file or referenced by means of a Uniform Resource Indicator (URI). Profile embedding has the advantage that the profile is permanently associated with the file and this association will not be lost during subsequent image processing or file management operations.
ICC profiles can be embedded or referenced in a wide range of file formats. This paper summarises the mechanical details of such operations, and provides pointers to sources of more detailed or comprehensive information.
The profile header incorporates a flags field that contains flags indicating whether a profile exists as an independent file or has been embedded into an image or document file. This flag is intended to provide hints for the CMM for purposes such as distributed processing and caching. A 1 in bit position 0 indicates the profile is embedded, and a further 1 in bit position 1 indicates that the profile cannot be used independently of the colour data of the image file it is embedded within.
Embedding a profile does not guarantee that it will be used when the image is processed, as this will depend on whether the application is colour management aware and any colour management run-time settings which may affect processing choices.
Image file formats that permit profile embedding fall into two types: those that define the procedure for profile embedding within the file format specification, and those that do not. In the latter case, ICC provides the necessary information in Annex B of the ICC specification. This applies to PICT, EPS, TIFF, JFIF (JPEG), and GIF formats.
All types of ICC profile except Abstract and DeviceLink profiles can be embedded into image files. When a profile is embedded, the complete file must be embedded without modification.},
	language = {English},
	publisher = {International Color Consortium},
	month = feb,
	year = {2012},
	file = {2012 - White Paper 36 Embedding and referencing ICC prof.pdf:/Users/tullsen/Zotero/storage/BTVVJUMK/2012 - White Paper 36 Embedding and referencing ICC prof.pdf:application/pdf},
}

@misc{WhitePaper352012,
	title = {White {Paper} 35: {Use} of the {parametricCurveType}},
	copyright = {Copyright International Color Consortium},
	shorttitle = {White {Paper} 35},
	abstract = {In Version 4 of the ICC Profile Specification, parametricCurveType was introduced as an alternative to curveType for the representation of one-dimensional transfer functions. Either type can be used for the TRC tags or for the A-curves, B-curves, and M-curves embedded in lutAtoBType or lutBtoAType tags. In contrast to the older curveType, the new V. 4 type defines curves by closed-form expressions, rather than by 1D Look-Up Tables. Each curve is a scalar function of a scalar variable, but the expressions also involve constants, or parameters, which are encoded in the corresponding profile tags.
The V. 4 Profile Specification supports five different Function Types, requiring between 1 and 7 parameters. The Specification places no restrictions on the values of these parameters, aside from those imposed by the format. According to Clause 10.15, the parameters are encoded in the s15Fixed16Number format. Thus, the values can range from –32768 to almost +32768 (actually 32768 – 1/65536) in steps of 1/65536, or 0.0000152587890625. These restrictions are quite mild and, in practice, are hardly noticeable.
The parametricCurveType can be used to encode a wide variety of different functions. Profiles using the parametricCurveType can contain a wide range of possible parameter sets, and if care is not taken with their selection, some possible parameter sets can create computational problems for a CMM. The purpose of this chapter is to call attention to these problems, which can include divide-by-zero faults, complex roots, discontinuities in value and slope, and inversion ambiguities.
A CMM developer is faced with difficult decisions on how best to handle these problems. Different choices may be made by different developers, which can lead to inconsistent results among CMMs. Some of these choices may even interfere with legitimate choices made by profile creators.
The ICC Specification provides guidance on avoiding complex or undefined values in one particular case, through the selection of the parameter d. Other issues can arise in the implementation of the parametricCurveType, and this chapter aims to provide some further guidance in this context for both the profile creator and the CMM developer},
	language = {English},
	publisher = {International Color Consortium},
	month = feb,
	year = {2012},
	file = {2012 - White Paper 35 Use of the parametricCurveType.pdf:/Users/tullsen/Zotero/storage/7TIG4X9M/2012 - White Paper 35 Use of the parametricCurveType.pdf:application/pdf},
}

@misc{WhitePaperICC2004,
	title = {White {Paper} 6: {ICC} version 2 \& version 4 display profile differences},
	copyright = {Copyright International Color Consortium},
	shorttitle = {White {Paper} 6},
	language = {English},
	publisher = {International Color Consortium},
	month = dec,
	year = {2004},
	file = {2004 - White Paper 6 ICC version 2 & version 4 display p.pdf:/Users/tullsen/Zotero/storage/MCE6VW2C/2004 - White Paper 6 ICC version 2 & version 4 display p.pdf:application/pdf},
}

@misc{WhitePaper212006,
	title = {White {Paper} 21: {Profile} {Compliance} {Testing} - {SampleICC} {Implementation} {Notes}},
	copyright = {Copyright International Color Consortium},
	shorttitle = {White {Paper} 21},
	abstract = {The ICC profile format specification defines an open file format that acts as an exchangeable container for data that is used to perform color transformations.
The file format defines data elements, order and meaning. Implementation of systems that provide reading and/or writing of ICC profile files can be made for a variety of reasons. Determining correctness of a profile’s generation/modification as outlined by the profile specification sometimes goes outside the information provided directly by the profile specification. This is because the exact tests to perform, the order they should be performed, and what the results of those tests mean is not clearly defined by the profile specification.
Providing a testing specification is a bit tenuous since to some degree the tests and meaning of the results can be context specific because not all implementations will use the data in the profiles for the same reasons. Additionally any changes to the profile specification would require changes to the testing specification. Keeping them in sync and determining ultimate authority becomes a problem.
Rather than provide a complete profile testing specification, this document will endeavor to describe some of the issues of profile conformance testing that were identified and addressed as part of implementing profile conformance capabilities into the SampleICC project.
The SampleICC project (see http://sampleicc.sourceforge.net) is an open source object oriented C++ development effort that was written to provide an example of how various aspects of color management can be implemented. It is maintained by the Architecture Working Group of the ICC.
This document provides overviews of test types and specifics of some important tests that are implemented in SampleICC’s IccProfLib. It is hoped that this discussion along with the code in SampleICC can be used as a guide to understanding issues related to determining profile compliance. This document along with the code in SampleICC should NOT be considered as a profile conformance testing specification.},
	language = {English},
	publisher = {International Color Consortium},
	month = feb,
	year = {2006},
	file = {2006 - White Paper 21 Profile Compliance Testing - Sampl.pdf:/Users/tullsen/Zotero/storage/6C2J6VH4/2006 - White Paper 21 Profile Compliance Testing - Sampl.pdf:application/pdf},
}

@misc{SpecificationICC20182018,
	title = {Specification {ICC}.2:2018 ({Profile} version 5.0.0 - {iccMAX}) {Image} technology color management - {Extensions} to architecture, profile format and data structure},
	copyright = {Copyright International Color Consortium},
	shorttitle = {{ICC}.2 v5.0.0},
	language = {English},
	publisher = {International Color Consortium},
	year = {2018},
	file = {2018 - Specification ICC.22018 (Profile version 5.0.0 - .pdf:/Users/tullsen/Zotero/storage/RZM696X3/2018 - Specification ICC.22018 (Profile version 5.0.0 - .pdf:application/pdf},
}

@misc{mateuszjurczykEffectiveFileFormat2016,
	address = {London, United Kingdom},
	title = {Effective file format fuzzing},
	url = {https://www.blackhat.com/docs/eu-16/materials/eu-16-Jurczyk-Effective-File-Format-Fuzzing-Thoughts-Techniques-And-Results.pdf},
	abstract = {What constitutes real-life offensive fuzzing (techniques and mindset).
•How each of the stages is typically implemented and how to improve them for maximized effectiveness.
•Tips \& tricks on the examples of software I’ve fuzzed during the past few years: Adobe Reader, Adobe Flash, Windows Kernel, Oracle Java, Hex-Rays IDA Pro, FreeType2, FFmpeg, pdfium, Wireshark, ...},
	language = {English},
	author = {{Mateusz Jurczyk}},
	month = nov,
	year = {2016},
	note = {https://youtu.be/qTTwqFRD1H8},
	file = {Mateusz Jurczyk - 2016 - Effective file format fuzzing.pdf:/Users/tullsen/Zotero/storage/NIXTI92K/Mateusz Jurczyk - 2016 - Effective file format fuzzing.pdf:application/pdf},
}

@inproceedings{cowgerResearchReportICARUS2020,
	title = {Research {Report}: {ICARUS}: {Understanding} {De} {Facto} {Formats} {By} {Way} of {Feathers} and {Wax}},
	url = {http://spw20.langsec.org/papers/icarus_LangSec2020.pdf},
	abstract = {When a data format achieves a signiﬁcant level of adoption, the presence of multiple format implementations expands the original speciﬁcation in often-unforeseen ways. This results in an implicitly deﬁned, de facto format, which can create vulnerabilities in programs handling the associated data ﬁles. In this paper we present our initial work on ICARUS: a toolchain for dealing with the problem of understanding and hardening de facto ﬁle formats. We show the results of our work in progress in the following areas: labeling and categorizing a corpora of data format samples to understand accepted variations of a format; the detection of sublanguages within the de facto format using both entropy- and taint-tracking-based methods, as a means of breaking down the larger problem of learning how the grammar has evolved; grammar inference via reinforcement learning, as a means of tying together the learned sublanguages; and the deﬁning of both safe subsets of the de facto grammar, as well as translations from unsafe regions of the de facto grammar into safe regions. Real-world data formats evolve as they ﬁnd use in real-world applications, and a comprehensive ICARUS toolchain for understanding and hardening the resulting de facto formats can identify and address security risks arising from this evolution.},
	language = {English},
	publisher = {IEEE},
	author = {Cowger, Sam and Lee, Yerim and Schimanski, Nichole and Tullsen, Mark and Woods, Walter and Jones, Richard and Davis, EW and Harris, William and Brunson, Trent and Harmon, Carson and Larsen, Bradford and Sultanik, Evan},
	month = may,
	year = {2020},
	pages = {8},
	file = {Cowger et al. - Research Report ICARUS Understanding De Facto Fo.pdf:/Users/tullsen/Zotero/storage/A2UK8MQD/Cowger et al. - Research Report ICARUS Understanding De Facto Fo.pdf:application/pdf},
}

@inproceedings{allisonBuildingWideReach2020,
	title = {Building a {Wide} {Reach} {Corpus}},
	url = {http://spw20.langsec.org/papers/corpus_LangSec2020.pdf},
	abstract = {Computer software that parses electronic ﬁles is often vulnerable to maliciously crafted input data. Rather than relying on developers to implement ad hoc defenses against such data, the Language-theoretic security (LangSec) philosophy offers formally correct and veriﬁable input handling throughout the software development lifecycle. Whether developing from a speciﬁcation or deriving parsers from samples, LangSec parser developers require wide-reach corpora of their target ﬁle format in order to identify key edge cases or common deviations from the format’s speciﬁcation. In this research report, we provide the details of several methods we have used to gather approximately 30 million ﬁles, extract features and make these features amenable to search and use in analytics. Additionally, we provide documentation on opportunities and limitations of some popular open-source datasets and annotation tools that will beneﬁt researchers which need to efﬁciently gather a large ﬁle corpus for the purposes of LangSec parser development.},
	language = {English},
	booktitle = {{LangSec} 2020},
	publisher = {IEEE},
	author = {Allison, Tim and Burke, Wayne and Constantinou, Valentino and Goh, Edwin and Mattmann, Chris and Mensikova, Anastasija and Southam, Philip and Stonebraker, Ryan and Timmaraju, Virisha},
	month = may,
	year = {2020},
	pages = {9},
	file = {corpus_LangSec2020.pdf:/Users/tullsen/Zotero/storage/9G2SU2ZI/corpus_LangSec2020.pdf:application/pdf;Allison et al. - 2020 - Building a Wide Reach Corpus.pdf:/Users/tullsen/Zotero/storage/6JECPJAD/Allison et al. - 2020 - Building a Wide Reach Corpus.pdf:application/pdf},
}

@inproceedings{mundkurResearchReportParsley2020,
	title = {Research {Report}: {The} {Parsley} {Data} {Format} {Deﬁnition} {Language}},
	url = {http://spw20.langsec.org/papers/parsley-langsec2020.pdf},
	abstract = {Any program that reads formatted input relies on parsing software to check the input for validity and transform it into a representation suitable for further processing. Many security vulnerabilities can be attributed to poorly deﬁned grammars, incorrect parsing, and sloppy input validation. In contrast to programming languages, grammars for even common data formats such as ELF and PDF are typically context-sensitive and heterogenous. However, as in programming languages, a standard notation or language to express these data format grammars can address poor or ambiguous deﬁnitions, and the automated generation of correct-byconstruction parsers from such grammar speciﬁcations can yield correct and type- and memory-safe data parsing routines. We present our ongoing work on developing such a data format description language. Parsley is a declarative data format deﬁnition language that combines grammars and constraints in a modular way. We show how it can be used to capture data formats such as MAVLink, PDF and ELF. We brieﬂy describe the processing pipeline we are designing to generate veriﬁed parsers from these speciﬁcations.},
	language = {English},
	booktitle = {{LangSec} 2020},
	publisher = {IEEE},
	author = {Mundkur, Prashanth and Briesemeister, Linda and Shankar, Natarajan and Anantharaman, Prashant and Ali, Sameed and Lucas, Zephyr and Smith, Sean},
	month = may,
	year = {2020},
	pages = {8},
	file = {Mundkur et al. - Research Report The Parsley Data Format Deﬁnition.pdf:/Users/tullsen/Zotero/storage/VIN3ZFCF/Mundkur et al. - Research Report The Parsley Data Format Deﬁnition.pdf:application/pdf},
}

@inproceedings{maiorcaStructuralContentbasedApproach2015,
	title = {A structural and content-based approach for a precise and robust detection of malicious {PDF} files},
	abstract = {During the past years, malicious PDF files have become a serious threat for the security of modern computer systems. They are characterized by a complex structure and their variety is considerably high. Several solutions have been academically developed to mitigate such attacks. However, they leveraged on information that were extracted from either only the structure or the content of the PDF file. This creates problems when trying to detect non-Javascript or targeted attacks. In this paper, we present a novel machine learning system for the automatic detection of malicious PDF documents. It extracts information from both the structure and the content of the PDF file, and it features an advanced parsing mechanism. In this way, it is possible to detect a wide variety of attacks, including non-Javascript and parsing-based ones. Moreover, with a careful choice of the learning algorithm, our approach provides a significantly higher accuracy compared to other static analysis techniques, especially in the presence of adversarial malware manipulation.},
	booktitle = {2015 {International} {Conference} on {Information} {Systems} {Security} and {Privacy} ({ICISSP})},
	publisher = {IEEE},
	author = {Maiorca, D. and Ariu, D. and Corona, I. and Giacinto, G.},
	month = feb,
	year = {2015},
	keywords = {PDF, Portable document format, Data mining, Feature extraction, Training, Malware, Security, Robustness, Adversarial Machine Learning, Evasion, Javascript},
	pages = {27--36},
	file = {Maiorca et al. - 2015 - A structural and content-based approach for a prec.pdf:/Users/tullsen/Zotero/storage/B4PWYK3J/Maiorca et al. - 2015 - A structural and content-based approach for a prec.pdf:application/pdf},
}

@misc{isotc171sc2wg9ISOCD142892,
	title = {{ISO}/{CD} 14289-2 {Document} management applications - {Electronic} document file format enhancement for accessibility - {Part} 2: {Use} of {ISO} 32000-2 ({PDF}/{UA}-2)},
	copyright = {Copyright ISO},
	shorttitle = {{PDF}/{UA}-2},
	url = {https://www.iso.org/standard/73127.html},
	language = {English},
	publisher = {ISO},
	author = {{ISO TC 171 SC 2 WG 9}},
	keywords = {PDF 2.0, ISO, PDF/UA},
}

@article{paykinWeirdMachinesInsecure2019,
	title = {Weird {Machines} as {Insecure} {Compilation}},
	url = {https://arxiv.org/abs/1911.00157v1},
	abstract = {Weird machines---the computational models accessible by exploiting security
vulnerabilities---arise from the difference between the model a programmer has
in her head of how her program should run and the implementation that actually
executes. Previous attempts to reason about or identify weird machines have
viewed these models through the lens of formal computational structures such as
state machines and Turing machines. But because programmers rarely think about
programs in this way, it is difficult to effectively apply insights about weird
machines to improve security.
  We present a new view of weird machines based on techniques from programming
languages theory and secure compilation. Instead of an underspecified model
drawn from a programmers' head, we start with a program written in a high-level
source language that enforces security properties by design. Instead of state
machines to describe computation, we use the well-defined semantics of this
source language and a target language, into which the source program will be
compiled. Weird machines are the sets of behaviors that can be achieved by a
compiled source program in the target language that cannot be achieved in the
source language directly. That is, exploits are witnesses to insecure
compilation.
  This paper develops a framework for characterizing weird machines as insecure
compilation, and illustrates the framework with examples of common exploits. We
study the classes of security properties that exploits violate, the
compositionality of exploits in a compiler stack, and the weird machines and
mitigations that arise.},
	language = {en},
	urldate = {2020-05-22},
	author = {Paykin, Jennifer and Mertens, Eric and Tullsen, Mark and Maurer, Luke and Razet, Benoît and Bakst, Alexander and Moore, Scott},
	month = nov,
	year = {2019},
	file = {Paykin et al. - 2019 - Weird Machines as Insecure Compilation.pdf:/Users/tullsen/Zotero/storage/ZB6VD7YS/Paykin et al. - 2019 - Weird Machines as Insecure Compilation.pdf:application/pdf},
}

@misc{londonPDFStudyResults,
	title = {{PDF} study results},
	url = {http://srg.doc.ic.ac.uk/projects/pdf-errors/results.html},
	abstract = {Research at Software Reliability Group},
	language = {English},
	urldate = {2020-05-13},
	journal = {Software Reliability Group},
	author = {London, Imperial College, Software Reliability Group},
	note = {Library Catalog: srg.doc.ic.ac.uk},
}

@misc{johanvanderknijffIdentificationPDFPreservation2014,
	type = {Blog},
	title = {Identification of {PDF} preservation risks: analysis of {Govdocs} selected corpus},
	url = {https://www.bitsgalore.org/2014/01/27/identification-pdf-preservation-risks-analysis-govdocs-selected-corpus},
	language = {English},
	urldate = {2020-05-13},
	journal = {bitsgalore},
	author = {{Johan van der Knijff}},
	month = jan,
	year = {2014},
}

@inproceedings{liResearchKNNAlgorithm2019,
	address = {Guangzhou, China},
	series = {{ICBDC} 2019},
	title = {Research on {KNN} {Algorithm} in {Malicious} {PDF} {Files} {Classification} under {Adversarial} {Environment}},
	isbn = {978-1-4503-6278-8},
	url = {https://doi.org/10.1145/3335484.3335527},
	doi = {10.1145/3335484.3335527},
	abstract = {Traditional machine learning classifiers are usually based on the same distribution of training and testing sets, and only pay attention to the accuracy of the classifier, when attackers change the data distribution, the usability of the model is reduced. A method on how to improve the robustness of the KNN classifier is proposed. Firstly, the gradient descent attack method is used to attack the KNN algorithm. Secondly, add the adversarial samples generated by the gradient descent attack to the training set to train a new KNN classifier. Finally, compare the robustness of the improved classifier and the initial classifier by simulating different attack strengths. The experimental results show that adding the adversarial samples to the KNN classifier can effectively improve the performance of the classifier against the evasion attacks.},
	urldate = {2020-05-09},
	booktitle = {Proceedings of the 2019 4th {International} {Conference} on {Big} {Data} and {Computing}},
	publisher = {Association for Computing Machinery},
	author = {Li, Kunming and Gu, Yijun and Zhang, Peijing and An, Wang and Li, Wenzheng},
	month = may,
	year = {2019},
	keywords = {Robustness, Adversarial machine learning, Evasion attacks, KNN algorithm},
	pages = {156--159},
	file = {Li et al. - 2019 - Research on KNN Algorithm in Malicious PDF Files C.pdf:/Users/tullsen/Zotero/storage/Z8XB9JKJ/Li et al. - 2019 - Research on KNN Algorithm in Malicious PDF Files C.pdf:application/pdf},
}

@inproceedings{dangEvadingClassifiersMorphing2017,
	address = {Dallas, Texas, USA},
	series = {{CCS} '17},
	title = {Evading {Classifiers} by {Morphing} in the {Dark}},
	isbn = {978-1-4503-4946-8},
	url = {https://doi.org/10.1145/3133956.3133978},
	doi = {10.1145/3133956.3133978},
	abstract = {Learning-based systems have been shown to be vulnerable to evasion through adversarial data manipulation. These attacks have been studied under assumptions that the adversary has certain knowledge of either the target model internals, its training dataset or at least classification scores it assigns to input samples. In this paper, we investigate a much more constrained and realistic attack scenario wherein the target classifier is minimally exposed to the adversary, revealing only its final classification decision (e.g., reject or accept an input sample). Moreover, the adversary can only manipulate malicious samples using a blackbox morpher. That is, the adversary has to evade the targeted classifier by morphing malicious samples "in the dark". We present a scoring mechanism that can assign a real-value score which reflects evasion progress to each sample based on the limited information available. Leveraging on such scoring mechanism, we propose an evasion method -- EvadeHC? and evaluate it against two PDF malware detectors, namely PDFRate and Hidost. The experimental evaluation demonstrates that the proposed evasion attacks are effective, attaining 100\% evasion rate on the evaluation dataset. Interestingly, EvadeHC outperforms the known classifier evasion techniques that operate based on classification scores output by the classifiers. Although our evaluations are conducted on PDF malware classifiers, the proposed approaches are domain agnostic and are of wider application to other learning-based systems.},
	urldate = {2020-05-09},
	booktitle = {Proceedings of the 2017 {ACM} {SIGSAC} {Conference} on {Computer} and {Communications} {Security}},
	publisher = {Association for Computing Machinery},
	author = {Dang, Hung and Huang, Yue and Chang, Ee-Chien},
	month = oct,
	year = {2017},
	keywords = {machine learning, evasion attacks},
	pages = {119--133},
	file = {Dang et al. - 2017 - Evading Classifiers by Morphing in the Dark.pdf:/Users/tullsen/Zotero/storage/4LR99ACT/Dang et al. - 2017 - Evading Classifiers by Morphing in the Dark.pdf:application/pdf},
}

@inproceedings{nicholasDocumentEngineeringIssues2015,
	address = {Lausanne, Switzerland},
	series = {{DocEng} '15},
	title = {Document {Engineering} {Issues} in {Document} {Analysis}},
	isbn = {978-1-4503-3307-8},
	url = {https://doi.org/10.1145/2682571.2801033},
	doi = {10.1145/2682571.2801033},
	abstract = {We present an overview of the field of malware analysis with emphasis on issues related to document engineering. We will introduce the field with a discussion of the types of malware, including executable binaries, polymorphic malware, malicious PDFs, and exploit kits. We will conclude with our view of important research questions in the field.},
	urldate = {2020-05-09},
	booktitle = {Proceedings of the 2015 {ACM} {Symposium} on {Document} {Engineering}},
	publisher = {Association for Computing Machinery},
	author = {Nicholas, Charles and Brandon, Robert},
	month = sep,
	year = {2015},
	keywords = {malware analysis, document engineering},
	pages = {229--230},
	file = {Nicholas and Brandon - 2015 - Document Engineering Issues in Document Analysis.pdf:/Users/tullsen/Zotero/storage/AQZDXFXF/Nicholas and Brandon - 2015 - Document Engineering Issues in Document Analysis.pdf:application/pdf},
}

@inproceedings{scofieldFastModelLearning2017,
	address = {Orlando, FL, USA},
	series = {{SSPREW}-7},
	title = {Fast {Model} {Learning} for the {Detection} of {Malicious} {Digital} {Documents}},
	isbn = {978-1-4503-5387-8},
	url = {https://doi.org/10.1145/3151137.3151142},
	doi = {10.1145/3151137.3151142},
	abstract = {Modern cyber attacks are often conducted by distributing digital documents that contain malware. The approach detailed herein, which consists of a classifier that uses features derived from dynamic analysis of a document viewer as it renders the document in question, is capable of classifying the disposition of digital documents with greater than 98\% accuracy even when its model is trained on just small amounts of data. To keep the classification model itself small and thereby to provide scalability, we employ an entity resolution strategy that merges syntactically disparate features that are thought to be semantically equivalent but vary due to programmatic randomness. Entity resolution enables construction of a comprehensive model of benign functionality using relatively few training documents, and the model does not improve significantly with additional training data.},
	urldate = {2020-05-09},
	booktitle = {Proceedings of the 7th {Software} {Security}, {Protection}, and {Reverse} {Engineering} / {Software} {Security} and {Protection} {Workshop}},
	publisher = {Association for Computing Machinery},
	author = {Scofield, Daniel and Miles, Craig and Kuhn, Stephen},
	month = dec,
	year = {2017},
	keywords = {malware detection, anomaly detection, dynamic analysis},
	pages = {1--8},
	file = {Scofield et al. - 2017 - Fast Model Learning for the Detection of Malicious.pdf:/Users/tullsen/Zotero/storage/GLTTBPP4/Scofield et al. - 2017 - Fast Model Learning for the Detection of Malicious.pdf:application/pdf},
}

@inproceedings{tzermiasCombiningStaticDynamic2011,
	address = {Salzburg, Austria},
	series = {{EUROSEC} '11},
	title = {Combining static and dynamic analysis for the detection of malicious documents},
	isbn = {978-1-4503-0613-3},
	url = {https://doi.org/10.1145/1972551.1972555},
	doi = {10.1145/1972551.1972555},
	abstract = {The widespread adoption of the PDF format for document exchange has given rise to the use of PDF files as a prime vector for malware propagation. As vulnerabilities in the major PDF viewers keep surfacing, effective detection of malicious PDF documents remains an important issue. In this paper we present MDScan, a standalone malicious document scanner that combines static document analysis and dynamic code execution to detect previously unknown PDF threats. Our evaluation shows that MDScan can detect a broad range of malicious PDF documents, even when they have been extensively obfuscated.},
	urldate = {2020-05-09},
	booktitle = {Proceedings of the {Fourth} {European} {Workshop} on {System} {Security}},
	publisher = {Association for Computing Machinery},
	author = {Tzermias, Zacharias and Sykiotakis, Giorgos and Polychronakis, Michalis and Markatos, Evangelos P.},
	month = apr,
	year = {2011},
	pages = {1--6},
	file = {Tzermias et al. - 2011 - Combining static and dynamic analysis for the dete.pdf:/Users/tullsen/Zotero/storage/TFGTCVSX/Tzermias et al. - 2011 - Combining static and dynamic analysis for the dete.pdf:application/pdf},
}

@inproceedings{dominguesDigitallySignedPermission2018,
	address = {Hamburg, Germany},
	series = {{ARES} 2018},
	title = {Digitally {Signed} and {Permission} {Restricted} {PDF} {Files}: a {Case} {Study} on {Digital} {Forensics}},
	isbn = {978-1-4503-6448-5},
	shorttitle = {Digitally {Signed} and {Permission} {Restricted} {PDF} {Files}},
	url = {https://doi.org/10.1145/3230833.3232811},
	doi = {10.1145/3230833.3232811},
	abstract = {The PDF format is the de-facto standard for many types of documents. Often a forensic digital investigation is faced with a significant volume of PDF files. It is thus important to filter PDF files, giving priority to files that have an high probability to carry important and meaningful data. In this paper, we focus on identifying potential important PDF files, selecting i) digitally signed files and ii) files that have special owner restrictions set, such as interdiction to assemble/separate pages. For this purpose, we present the python-based digiSign{\textbar}protectedPDF module for the open source Autopsy forensic software. When run over a digital forensic data source, the module creates two lists: one holding the digitally signed files and, another one with files that have special restrictions in their usage. To study the occurrence of digitally signed and of permission-protected PDF and their importance for digital forensics, we analyzed a Windows 10 forensic image, finding that 2.81\% of the PDF files were digitally signed and 3.75\% were permission-protected. The study shows that digitally signed PDF files can harbor meaningful data for a digital forensic investigation.},
	urldate = {2020-05-09},
	booktitle = {Proceedings of the 13th {International} {Conference} on {Availability}, {Reliability} and {Security}},
	publisher = {Association for Computing Machinery},
	author = {Domingues, Patricio and Frade, Miguel},
	month = aug,
	year = {2018},
	keywords = {PDF files, Digital forensics, Digital signatures, permission-protected PDF files},
	pages = {1--8},
	file = {Domingues and Frade - 2018 - Digitally Signed and Permission Restricted PDF Fil.pdf:/Users/tullsen/Zotero/storage/HKIRNUYL/Domingues and Frade - 2018 - Digitally Signed and Permission Restricted PDF Fil.pdf:application/pdf},
}

@inproceedings{hassanUniversallyEditablePortable2018,
	address = {Halifax, NS, Canada},
	series = {{DocEng} '18},
	title = {Towards a {Universally} {Editable} {Portable} {Document} {Format}},
	isbn = {978-1-4503-5769-2},
	url = {https://doi.org/10.1145/3209280.3229083},
	doi = {10.1145/3209280.3229083},
	abstract = {PDF is the established format for the exchange of final-form print-oriented documents on the Web, and for a good reason: it is the only format that guarantees the preservation of layout across different platforms, systems and viewing devices. Its main disadvantage, however, is that a document, once converted to PDF, is very difficult to edit. As of today (2018), there is still no universal format for the exchange of editable formatted text documents on the Web; users can only exchange the application's source files, which do not benefit from the robustness and portability of PDF. This position paper describes how we can engineer such an editable format based on some of the principles of PDF. We begin by analysing the current status quo, and provide a summary of current approaches for editing existing PDFs, other relevant document formats, and ways to embed the document's structure into the PDF itself. We then ask ourselves what it really means for a formatted document to be editable, and discuss the related problem of enabling WYSIWYG direct manipulation even in cases where layout is usually computed or optimized using offline or batch methods (as is common with long-form documents). After defining our goals, we propose a framework for creating such editable portable documents and present a prototype tool that demonstrates our initial steps and serves as a proof of concept. We conclude by providing a roadmap for future work.},
	urldate = {2020-05-09},
	booktitle = {Proceedings of the {ACM} {Symposium} on {Document} {Engineering} 2018},
	publisher = {Association for Computing Machinery},
	author = {Hassan, Tamir},
	month = aug,
	year = {2018},
	keywords = {PDF, Document formats, interactive editing, layout optimization},
	pages = {1--4},
	file = {Hassan - 2018 - Towards a Universally Editable Portable Document F.pdf:/Users/tullsen/Zotero/storage/J9G4LD2V/Hassan - 2018 - Towards a Universally Editable Portable Document F.pdf:application/pdf},
}

@inproceedings{mullerPracticalDecryptionExFiltration2019,
	address = {London, United Kingdom},
	series = {{CCS} '19},
	title = {Practical {Decryption} {exFiltration}: {Breaking} {PDF} {Encryption}},
	isbn = {978-1-4503-6747-9},
	shorttitle = {Practical {Decryption} {exFiltration}},
	url = {https://doi.org/10.1145/3319535.3354214},
	doi = {10.1145/3319535.3354214},
	abstract = {The Portable Document Format, better known as PDF, is one of the most widely used document formats worldwide, and in order to ensure information confidentiality, this file format supports document encryption. In this paper, we analyze PDF encryption and show two novel techniques for breaking the confidentiality of encrypted documents. First, we abuse the PDF feature of partially encrypted documents to wrap the encrypted part of the document within attacker-controlled content and therefore, exfiltrate the plaintext once the document is opened by a legitimate user. Second, we abuse a flaw in the PDF encryption specification to arbitrarily manipulate encrypted content. The only requirement is that a single block of known plaintext is needed, and we show that this is fulfilled by design. Our attacks allow the recovery of the entire plaintext of encrypted documents by using exfiltration channels which are based on standard compliant PDF properties. We evaluated our attacks on 27 widely used PDF viewers and found all of them to be vulnerable. We responsibly disclosed the vulnerabilities and supported the vendors in fixing the issues.},
	urldate = {2020-05-09},
	booktitle = {Proceedings of the 2019 {ACM} {SIGSAC} {Conference} on {Computer} and {Communications} {Security}},
	publisher = {Association for Computing Machinery},
	author = {Müller, Jens and Ising, Fabian and Mladenov, Vladislav and Mainka, Christian and Schinzel, Sebastian and Schwenk, Jörg},
	month = nov,
	year = {2019},
	keywords = {PDF, encryption, CBC gadgets, CBC malleability, direct exfiltration},
	pages = {15--29},
	file = {Müller et al. - 2019 - Practical Decryption exFiltration Breaking PDF En.pdf:/Users/tullsen/Zotero/storage/SDF57V6P/Müller et al. - 2019 - Practical Decryption exFiltration Breaking PDF En.pdf:application/pdf},
}

@inproceedings{jordanUnacceptableBehaviorRobust2019,
	address = {London, United Kingdom},
	series = {{PLAS}'19},
	title = {Unacceptable {Behavior}: {Robust} {PDF} {Malware} {Detection} {Using} {Abstract} {Interpretation}},
	isbn = {978-1-4503-6836-0},
	shorttitle = {Unacceptable {Behavior}},
	url = {https://doi.org/10.1145/3338504.3357341},
	doi = {10.1145/3338504.3357341},
	abstract = {The popularity of the PDF format and the rich JavaScript environment that PDF viewers offer make PDF documents an attractive attack vector for malware developers. PDF documents present a serious threat to the security of organizations because most users are unsuspecting of them and thus likely to open documents from untrusted sources. State-of-the-art approaches use machine learning to learn features that characterize PDF malware, which makes them subject to adversarial attacks that mimic the structure of benign documents. In this paper, we instead propose to detect malicious code inside a PDF by statically reasoning about its possible behavior using abstract interpretation. A comparison with state-of-the-art PDF malware detection tools shows that our conservative abstract interpretation approach achieves similar accuracy, is more resilient to evasion attacks, and provides interpretable reports.},
	urldate = {2020-05-09},
	booktitle = {Proceedings of the 14th {ACM} {SIGSAC} {Workshop} on {Programming} {Languages} and {Analysis} for {Security}},
	publisher = {Association for Computing Machinery},
	author = {Jordan, Alexander and Gauthier, François and Hassanshahi, Behnaz and Zhao, David},
	month = nov,
	year = {2019},
	keywords = {malware, pdf, javascript, abstract interpretation},
	pages = {19--30},
	file = {Jordan et al. - 2019 - Unacceptable Behavior Robust PDF Malware Detectio.pdf:/Users/tullsen/Zotero/storage/YSB4P5HK/Jordan et al. - 2019 - Unacceptable Behavior Robust PDF Malware Detectio.pdf:application/pdf},
}

@misc{peterwyattArlingtonPDFModel2021,
	address = {Virtual},
	title = {The {Arlington} {PDF} {Model}},
	copyright = {All rights reserved},
	url = {https://www.pdfa.org/presentation/the-arlington-pdf-model/},
	abstract = {This talk will present the Arlington PDF Model as the first open access, vendor-neutral, comprehensive, specification-derived machine-readable definition of all},
	language = {English},
	urldate = {2021-11-02},
	author = {{Peter Wyatt}},
	month = sep,
	year = {2021},
	note = {https://youtu.be/ELAFymRYV30},
}

@inproceedings{masodPDFOutputWorkflow2021,
	title = {{PDF} {Output} {Workflow} {Conformance} {Test} to the {PDF}/{X}-4 {Standard} ({ISO} 15930-7:2010)},
	shorttitle = {{PDF} {Output} {Workflow} {Conformance} {Test} to the {PDF}/{X}-4 {Standard} ({ISO} 15930-7},
	url = {https://ieeexplore.ieee.org/abstract/document/9689185},
	doi = {10.1109/ICSPC53359.2021.9689185},
	abstract = {In today's complex digital prepress environment, application settings and RIP settings can significantly affect the results of the printed materials. However, PDF workflow systems' compliance with PDF/X settings is questionable. Many workflows are still changing the critical properties of a PDF file. This is prohibited in the ISO-15930 because the result that the creator has seen and accepted in software or on a proof will be different from the final printed product if such changes are applied in a later stage. This study identified seven critical properties and to test them accordingly. Utilizing visual indicators and reference images for indicators of errors. The test reveals that all the issues tested are real-world issues, particularly in a print production environment.},
	booktitle = {2021 {IEEE} 9th {Conference} on {Systems}, {Process} and {Control} ({ICSPC} 2021)},
	author = {Masod, Muhammad Yusuf and Hassan Nazri, Nur Suzieana and Nordin, Nurhanis},
	month = dec,
	year = {2021},
	keywords = {PDF, Portable document format, Rendering (computer graphics), Visualization, Color, PDF/X-4, prepress, printing, Printing, Production, standard, Systematics, workflow},
	pages = {92--97},
}