diff --git a/manuscript/2024_12_attribution_broad_retreat/beamerthemegemini.sty b/manuscript/2024_12_attribution_broad_retreat/beamerthemegemini.sty new file mode 100644 index 0000000..4a69ec5 --- /dev/null +++ b/manuscript/2024_12_attribution_broad_retreat/beamerthemegemini.sty @@ -0,0 +1,257 @@ +% Gemini theme +% https://github.com/anishathalye/gemini + +% ==================== +% Dependencies +% ==================== + +\RequirePackage{exscale} +\RequirePackage{ragged2e} +\RequirePackage{changepage} +\RequirePackage{fontspec} +\RequirePackage{calc} + +% ==================== +% Fonts +% ==================== + +\newfontfamily\Raleway[Ligatures=TeX]{Raleway} +\newfontfamily\Lato[Ligatures=TeX]{Lato} + +\usefonttheme{professionalfonts} + +\setsansfont{Lato}[ + UprightFont=*-Light, + ItalicFont=*-LightItalic, + BoldFont=*-Regular, + BoldItalicFont=*-Italic +] + +\setbeamerfont{headline}{family=\Raleway} +\setbeamerfont{headline title}{size=\Huge,series=\bfseries} +\setbeamerfont{headline author}{size=\Large} +\setbeamerfont{headline institute}{size=\normalsize} +\setbeamerfont{block title}{family=\Raleway,size=\large,series=\bfseries} +\setbeamerfont{heading}{family=\Lato,series=\bfseries} +\setbeamerfont{caption}{size=\small} +\setbeamerfont{footline}{family=\Raleway,size=\normalsize} + +% ==================== +% Macros +% ==================== + +\newcommand{\samelineand}{\qquad} + +% ==================== +% Elements +% ==================== + +% List +\def\@listi{\leftmargin\leftmargini +\topsep 1ex % spacing before +\parsep 0\p@ \@plus\p@ +\itemsep 0.5ex} % spacing between + +% Itemize + +\setbeamertemplate{itemize item}{\raise0.5ex \hbox{\vrule width 0.5ex height 0.5ex}} +\setbeamertemplate{itemize subitem}{\raise0.3ex \hbox{\vrule width 0.5ex height 0.5ex}} +\setbeamertemplate{itemize subsubitem}{\raise0.2ex \hbox{\vrule width 0.5ex height 0.5ex}} + +% Enumerate + +\setbeamertemplate{enumerate item}{\insertenumlabel.} +\setbeamertemplate{enumerate subitem}{\insertsubenumlabel.} +\setbeamertemplate{enumerate subsubitem}{\insertsubsubenumlabel.} + +% Equation +\setlength\belowdisplayshortskip{2ex} + +% Caption +\setbeamertemplate{caption}[numbered] +\setbeamertemplate{caption label separator}[period] +\setlength{\abovecaptionskip}{2ex} +\setlength{\belowcaptionskip}{1ex} + +% Bibliography +\setbeamertemplate{bibliography item}[text] + +% Navigation +\beamertemplatenavigationsymbolsempty + +% ==================== +% Components +% ==================== + +% Heading +\newcommand\heading[1] +{% + \par\bigskip + {\usebeamerfont{heading}\usebeamercolor[fg]{heading}#1}\par\smallskip +} + +% logo +\newlength{\logoleftwidth} +\setlength{\logoleftwidth}{0cm} +\newlength{\logorightwidth} +\setlength{\logorightwidth}{0cm} +\newlength{\maxlogowidth} % space on both sides set to maxlogowidth to keep title centered +\setlength{\maxlogowidth}{0cm} + +\newcommand{\logoright}[1]{ + \newcommand{\insertlogoright}{#1} + \settowidth{\logorightwidth}{\insertlogoright} + \addtolength{\logorightwidth}{10ex} + \setlength{\maxlogowidth}{\maxof{\logoleftwidth}{\logorightwidth}} +} +\newcommand{\logoleft}[1]{ + \newcommand{\insertlogoleft}{#1} + \settowidth{\logoleftwidth}{\insertlogoleft} + \addtolength{\logoleftwidth}{10ex} + \setlength{\maxlogowidth}{\maxof{\logoleftwidth}{\logorightwidth}} +} + +% Headline +\setbeamertemplate{headline} +{ + \begin{beamercolorbox}{headline} + \begin{columns} + \begin{column}{\maxlogowidth} + \vskip5ex + \ifdefined\insertlogoleft + \vspace*{\fill} + \hspace{10ex} + \raggedright + \insertlogoleft + \vspace*{\fill} + \else\fi + \end{column} + \begin{column}{\dimexpr\paperwidth-\maxlogowidth-\maxlogowidth-60ex} % Subtracting distance at the end moves the right logo to the left + \usebeamerfont{headline} + \vskip3ex + \centering + \ifx \inserttitle \empty \else + {\usebeamerfont{headline title}\usebeamercolor[fg]{headline title}\inserttitle\\[0.5ex]} + \fi + \ifx \beamer@shortauthor \empty \else + {\usebeamerfont{headline author}\usebeamercolor[fg]{headline author}\insertauthor\\[1ex]} + \fi + \ifx \insertinstitute \empty \else + {\usebeamerfont{headline institute}\usebeamercolor[fg]{headline institute}\insertinstitute\\[1ex]} + \fi + \end{column} + \begin{column}{\maxlogowidth} + \vskip5ex + \ifdefined\insertlogoright + \vspace*{\fill} + \raggedleft + \insertlogoright + \hspace{10ex} + \vspace*{\fill} + \else\fi + \end{column} + \end{columns} + \vspace{5ex} + \ifbeamercolorempty[bg]{headline rule}{}{ + \begin{beamercolorbox}[wd=\paperwidth,colsep=0.5ex]{headline rule}\end{beamercolorbox} + } + \end{beamercolorbox} +} + +% Block +\setbeamertemplate{block begin} +{ + \begin{beamercolorbox}[colsep*=0ex,dp=2ex,center]{block title} + \vskip0pt + \usebeamerfont{block title}\insertblocktitle + \vskip-1.25ex + \begin{beamercolorbox}[colsep=0.025ex]{block separator}\end{beamercolorbox} + \end{beamercolorbox} + {\parskip0pt\par} + \usebeamerfont{block body} + \vskip-0.5ex + \begin{beamercolorbox}[colsep*=0ex]{block body} + \justifying + \setlength{\parskip}{1ex} + \vskip-2ex +} +\setbeamertemplate{block end} +{ + \end{beamercolorbox} + \vskip0pt + \vspace*{2ex} +} + +% Alert Block +\setbeamertemplate{block alerted begin} +{ + \begin{beamercolorbox}[colsep*=0ex,dp=2ex,center]{block alerted title} + \vskip0pt + \usebeamerfont{block title}\insertblocktitle + \vskip-1.25ex + \begin{beamercolorbox}[colsep=0.025ex]{block alerted separator}\end{beamercolorbox} + \end{beamercolorbox} + {\parskip0pt\par} + \usebeamerfont{block body} + \vskip-0.5ex + \begin{beamercolorbox}[colsep*=0ex]{block alerted body} + \justifying + \begin{adjustwidth}{1ex}{1ex} + \setlength{\parskip}{1ex} + \vskip-2ex +} +\setbeamertemplate{block alerted end} +{ + \end{adjustwidth} + \vskip1ex + \end{beamercolorbox} + \vskip0pt + \vspace*{2ex} +} + +% Example Block +\setbeamertemplate{block example begin} +{ + \begin{beamercolorbox}[colsep*=0ex,dp=2ex,center]{block example title} + \vskip0pt + \usebeamerfont{block title}\insertblocktitle + \vskip-1.25ex + \begin{beamercolorbox}[colsep=0.025ex]{block example separator}\end{beamercolorbox} + \end{beamercolorbox} + {\parskip0pt\par} + \usebeamerfont{block body} + \vskip-0.5ex + \begin{beamercolorbox}[colsep*=0ex]{block example body} + \justifying + \begin{adjustwidth}{1ex}{1ex} + \setlength{\parskip}{1ex} + \vskip-2ex +} +\setbeamertemplate{block example end} +{ + \end{adjustwidth} + \vskip1ex + \end{beamercolorbox} + \vskip0pt + \vspace*{2ex} +} + +% Footer +\newcommand{\footercontent}[1]{\newcommand{\insertfootercontent}{#1}} + +\setbeamertemplate{footline}{ + \ifdefined\insertfootercontent + \begin{beamercolorbox}[vmode]{headline} + \ifbeamercolorempty[bg]{headline rule}{}{ + \begin{beamercolorbox}[wd=\paperwidth,colsep=0.25ex]{headline rule}\end{beamercolorbox} + } + \vspace{1.5ex} + \hspace{\sepwidth} + \usebeamerfont{footline} + \centering + \insertfootercontent + \hspace{\sepwidth} + \vspace{1.5ex} + \end{beamercolorbox} + \else\fi +} diff --git a/manuscript/2024_12_attribution_broad_retreat/cv-style.sty b/manuscript/2024_12_attribution_broad_retreat/cv-style.sty new file mode 100644 index 0000000..e2368fe --- /dev/null +++ b/manuscript/2024_12_attribution_broad_retreat/cv-style.sty @@ -0,0 +1,13 @@ +\definecolor{red}{RGB}{221,42,43} +\definecolor{green}{RGB}{105,182,40} +\definecolor{blue}{RGB}{0,51,153} +\definecolor{gray}{RGB}{25,25,25} +%\definecolor{red}{HTML}{D43F3F} +%\definecolor{blue}{HTML}{00ACE9} +%\definecolor{green}{HTML}{6A9A1F} +\colorlet{theMainColor}{blue} +\colorlet{theRefColor}{blue!90} +\newcommand{\globalcolor}[1]{% + + \color{#1}\global\let\default@color\current@color +} diff --git a/manuscript/2024_12_attribution_broad_retreat/flake.nix b/manuscript/2024_12_attribution_broad_retreat/flake.nix index e96889a..c190f28 100644 --- a/manuscript/2024_12_attribution_broad_retreat/flake.nix +++ b/manuscript/2024_12_attribution_broad_retreat/flake.nix @@ -57,7 +57,7 @@ imagemagick ]; enterShell = '' - export FONTCONFIG_FILE=$(nix-build -E 'let pkgs = import { }; in pkgs.makeFontsConf { fontDirectories = [ pkgs.freefont_ttf ]; }') + export FONTCONFIG_FILE=$(nix-build -E 'let pkgs = import { }; in pkgs.makeFontsConf { fontDirectories = [ pkgs.freefont_ttf pkgs.raleway pkgs.lato]; }') ''; } diff --git a/manuscript/2024_12_attribution_broad_retreat/local-bib.bib b/manuscript/2024_12_attribution_broad_retreat/local-bib.bib new file mode 100644 index 0000000..18a0bfc --- /dev/null +++ b/manuscript/2024_12_attribution_broad_retreat/local-bib.bib @@ -0,0 +1,2355 @@ +@misc{2024_11_30_marimo_explorerpy, + title = {2024\_11\_30\_marimo\_explorer.Py}, + urldate = {2024-12-05}, + howpublished = {http://localhost:2721/}, + file = {/Users/hhakem/Zotero/storage/78PYG9D7/localhost.html} +} + +@misc{AbcAbstractBase, + title = {Abc --- {{Abstract Base Classes}}}, + journal = {Python documentation}, + urldate = {2024-07-25}, + abstract = {Source code: Lib/abc.py This module provides the infrastructure for defining abstract base classes(ABCs) in Python, as outlined in PEP 3119; see the PEP for why this was added to Python. (See also ...}, + howpublished = {https://docs.python.org/3/library/abc.html}, + langid = {english}, + file = {/Users/hhakem/Zotero/storage/L6EURHSS/abc.html} +} + +@misc{ahlmann-eltzeDeepLearningbasedPredictions2024, + title = {Deep Learning-Based Predictions of Gene Perturbation Effects Do Not yet Outperform Simple Linear Methods}, + author = {{Ahlmann-Eltze}, Constantin and Huber, Wolfgang and Anders, Simon}, + year = {2024}, + month = oct, + primaryclass = {New Results}, + pages = {2024.09.16.613342}, + publisher = {bioRxiv}, + doi = {10.1101/2024.09.16.613342}, + urldate = {2024-11-04}, + abstract = {Advanced deep-learning methods, such as transformer-based foundation models, promise to learn representations of biology that can be employed to predict in silico the outcome of unseen experiments, such as the effect of genetic perturbations on the transcriptomes of human cells. To see whether current models already reach this goal, we benchmarked two state-of-the-art foundation models and one popular graph-based deep learning framework against deliberately simplistic linear models in two important use cases: For combinatorial perturbations of two genes for which only data for the individual single perturbations have been seen, we find that a simple additive model outperformed the deep learning-based approaches. Also, for perturbations of genes that have not yet been seen, but which may be "interpolated" from biological similarity or network context, a simple linear model performed as good as the deep learning-based approaches. While the promise of deep neural networks for the representation of biological systems and prediction of experimental outcomes is plausible, our work highlights the need for critical benchmarking to direct research efforts that aim to bring transfer learning to biology.}, + archiveprefix = {bioRxiv}, + chapter = {New Results}, + copyright = {{\copyright} 2024, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution-NonCommercial 4.0 International), CC BY-NC 4.0, as described at http://creativecommons.org/licenses/by-nc/4.0/}, + langid = {english}, + file = {/Users/hhakem/Zotero/storage/BZTWPHQQ/Ahlmann-Eltze et al. - 2024 - Deep learning-based predictions of gene perturbati.pdf} +} + +@misc{akinremiBestToolsModel2022, + title = {Best {{Tools}} for {{Model Tuning}} and {{Hyperparameter Optimization}}}, + author = {Akinremi, Bunmi}, + year = {2022}, + month = jul, + journal = {neptune.ai}, + urldate = {2024-07-31}, + abstract = {I vividly remember a machine learning hackathon that I participated in two years ago, when I was at the beginning of my data science career. It was a pre-qualification hackathon for a bootcamp organised by Data Science Nigeria.~ The dataset had information about certain employees. I had to predict if an employee should get a{\dots}}, + howpublished = {https://neptune.ai/blog/best-tools-for-model-tuning-and-hyperparameter-optimization}, + langid = {american}, + file = {/Users/hhakem/Zotero/storage/4NPR5SUT/best-tools-for-model-tuning-and-hyperparameter-optimization.html} +} + +@inproceedings{alalufReStyleResidualBasedStyleGAN2021, + title = {{{ReStyle}}: {{A Residual-Based StyleGAN Encoder}} via {{Iterative Refinement}}}, + shorttitle = {{{ReStyle}}}, + booktitle = {2021 {{IEEE}}/{{CVF International Conference}} on {{Computer Vision}} ({{ICCV}})}, + author = {Alaluf, Yuval and Patashnik, Or and {Cohen-Or}, Daniel}, + year = {2021}, + month = oct, + pages = {6691--6700}, + issn = {2380-7504}, + doi = {10.1109/ICCV48922.2021.00664}, + urldate = {2024-07-10}, + abstract = {Recently, the power of unconditional image synthesis has significantly advanced through the use of Generative Adversarial Networks (GANs). The task of inverting an image into its corresponding latent code of the trained GAN is of utmost importance as it allows for the manipulation of real images, leveraging the rich semantics learned by the network. Recognizing the limitations of current inversion approaches, in this work we present a novel inversion scheme that extends current encoder-based inversion methods by introducing an iterative refinement mechanism. Instead of directly predicting the latent code of a given real image using a single pass, the encoder is tasked with predicting a residual with respect to the current estimate of the inverted latent code in a self-correcting manner. Our residual-based encoder, named ReStyle, attains improved accuracy compared to current state-of-the-art encoder-based methods with a negligible increase in inference time. We analyze the behavior of ReStyle to gain valuable insights into its iterative nature. We then evaluate the performance of our residual encoder and analyze its robustness compared to optimization-based inversion and state-of-the-art encoders. Code is available via our project page: https: //yuval-alaluf.github.io/restyle-encoder/}, + keywords = {Codes,Generative adversarial networks,Image and video synthesis,Image synthesis,Iterative methods,Neural generative models,Representation learning,Robustness,Semantics,Training,UnRead,Vision applications and systems}, + file = {/Users/hhakem/Zotero/storage/3Y8U2BUQ/Alaluf et al. - 2021 - ReStyle A Residual-Based StyleGAN Encoder via Ite.pdf;/Users/hhakem/Zotero/storage/TWYXBJKQ/citations.html} +} + +@article{albouyConstrainedOptimizationShadow, + title = {Constrained {{Optimization}}, {{Shadow Prices}}, {{Inefficient Markets}}, and {{Government Projects}}}, + author = {Albouy, David}, + langid = {english} +} + +@misc{almahairiAugmentedCycleGANLearning2018, + title = {Augmented {{CycleGAN}}: {{Learning Many-to-Many Mappings}} from {{Unpaired Data}}}, + shorttitle = {Augmented {{CycleGAN}}}, + author = {Almahairi, Amjad and Rajeswar, Sai and Sordoni, Alessandro and Bachman, Philip and Courville, Aaron}, + year = {2018}, + month = jun, + number = {arXiv:1802.10151}, + eprint = {1802.10151}, + primaryclass = {cs}, + publisher = {arXiv}, + urldate = {2024-07-09}, + abstract = {Learning inter-domain mappings from unpaired data can improve performance in structured prediction tasks, such as image segmentation, by reducing the need for paired data. CycleGAN was recently proposed for this problem, but critically assumes the underlying inter-domain mapping is approximately deterministic and one-to-one. This assumption renders the model ineffective for tasks requiring flexible, many-to-many mappings. We propose a new model, called Augmented CycleGAN, which learns many-to-many mappings between domains. We examine Augmented CycleGAN qualitatively and quantitatively on several image datasets.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Machine Learning,UnRead}, + file = {/Users/hhakem/Zotero/storage/PVPL8HVK/Almahairi et al. - 2018 - Augmented CycleGAN Learning Many-to-Many Mappings.pdf} +} + +@misc{anconaBetterUnderstandingGradientbased2018, + title = {Towards Better Understanding of Gradient-Based Attribution Methods for {{Deep Neural Networks}}}, + author = {Ancona, Marco and Ceolini, Enea and {\"O}ztireli, Cengiz and Gross, Markus}, + year = {2018}, + month = mar, + number = {arXiv:1711.06104}, + eprint = {1711.06104}, + primaryclass = {cs, stat}, + publisher = {arXiv}, + urldate = {2024-07-03}, + abstract = {Understanding the flow of information in Deep Neural Networks (DNNs) is a challenging problem that has gain increasing attention over the last few years. While several methods have been proposed to explain network predictions, there have been only a few attempts to compare them from a theoretical perspective. What is more, no exhaustive empirical comparison has been performed in the past. In this work, we analyze four gradient-based attribution methods and formally prove conditions of equivalence and approximation between them. By reformulating two of these methods, we construct a unified framework which enables a direct comparison, as well as an easier implementation. Finally, we propose a novel evaluation metric, called Sensitivity-n and test the gradient-based attribution methods alongside with a simple perturbation-based attribution method on several datasets in the domains of image and text classification, using various network architectures.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Machine Learning,Read,Statistics - Machine Learning}, + file = {/Users/hhakem/Zotero/storage/K8VE4N5M/Ancona et al. - 2018 - Towards better understanding of gradient-based att.pdf} +} + +@article{andersHTSeqPythonFramework2015, + title = {{{HTSeq}}---a {{Python}} Framework to Work with High-Throughput Sequencing Data}, + author = {Anders, Simon and Pyl, Paul Theodor and Huber, Wolfgang}, + year = {2015}, + month = jan, + journal = {Bioinformatics}, + volume = {31}, + number = {2}, + pages = {166--169}, + issn = {1367-4803}, + doi = {10.1093/bioinformatics/btu638}, + urldate = {2024-11-04}, + abstract = {Motivation: A large choice of tools exists for many standard tasks in the analysis of high-throughput sequencing (HTS) data. However, once a project deviates from standard workflows, custom scripts are needed. Results: We present HTSeq, a Python library to facilitate the rapid development of such scripts. HTSeq offers parsers for many common data formats in HTS projects, as well as classes to represent data, such as genomic coordinates, sequences, sequencing reads, alignments, gene model information and variant calls, and provides data structures that allow for querying via genomic coordinates. We also present htseq-count, a tool developed with HTSeq that preprocesses RNA-Seq data for differential expression analysis by counting the overlap of reads with genes. Availability and implementation: HTSeq is released as an open-source software under the GNU General Public Licence and available from http://www-huber.embl.de/HTSeq or from the Python Package Index at https://pypi.python.org/pypi/HTSeq . Contact: ~sanders@fs.tum.de}, + file = {/Users/hhakem/Zotero/storage/9RH7SM33/Anders et al. - 2015 - HTSeq—a Python framework to work with high-through.pdf;/Users/hhakem/Zotero/storage/7V28NDBT/2366196.html} +} + +@article{araujoComputingReceptiveFields2019, + title = {Computing {{Receptive Fields}} of {{Convolutional Neural Networks}}}, + author = {Araujo, Andr\é and Norris, Wade and Sim, Jack}, + year = {2019}, + month = nov, + journal = {Distill}, + volume = {4}, + number = {11}, + pages = {e21}, + issn = {2476-0757}, + doi = {10.23915/distill.00021}, + urldate = {2024-08-20}, + abstract = {Detailed derivations and open-source code to analyze the receptive fields of convnets.}, + langid = {english}, + file = {/Users/hhakem/Zotero/storage/J93A8PPN/computing-receptive-fields.html} +} + +@article{arevaloEvaluatingBatchCorrection2024, + title = {Evaluating Batch Correction Methods for Image-Based Cell Profiling}, + author = {Arevalo, John and Su, Ellen and {van Dijk}, Robert and Carpenter, Anne E. and Singh, Shantanu}, + year = {2024}, + month = feb, + journal = {bioRxiv}, + pages = {2023.09.15.558001}, + doi = {10.1101/2023.09.15.558001}, + urldate = {2024-07-25}, + abstract = {High-throughput image-based profiling platforms are powerful technologies capable of collecting data from billions of cells exposed to thousands of perturbations in a time- and cost-effective manner. Therefore, image-based profiling data has been increasingly used for diverse biological applications, such as predicting drug mechanism of action or gene function. However, batch effects pose severe limitations to community-wide efforts to integrate and interpret image-based profiling data collected across different laboratories and equipment. To address this problem, we benchmarked seven high-performing scRNA-seq batch correction techniques, representing diverse approaches, using a newly released Cell Painting dataset, the largest publicly accessible image-based dataset. We focused on five different scenarios with varying complexity, and we found that Harmony, a mixture-model based method, consistently outperformed the other tested methods. Our proposed framework, benchmark, and metrics can additionally be used to assess new batch correction methods in the future. Overall, this work paves the way for improvements that allow the community to make best use of public Cell Painting data for scientific discovery.}, + pmcid = {PMC10516049}, + pmid = {37745478}, + file = {/Users/hhakem/Zotero/storage/XUUUKFE7/Arevalo et al. - 2024 - Evaluating batch correction methods for image-base.pdf} +} + +@misc{augustinDiffusionVisualCounterfactual2022, + title = {Diffusion {{Visual Counterfactual Explanations}}}, + author = {Augustin, Maximilian and Boreiko, Valentyn and Croce, Francesco and Hein, Matthias}, + year = {2022}, + month = oct, + number = {arXiv:2210.11841}, + eprint = {2210.11841}, + primaryclass = {cs}, + publisher = {arXiv}, + urldate = {2024-10-10}, + abstract = {Visual Counterfactual Explanations (VCEs) are an important tool to understand the decisions of an image classifier. They are ``small'' but ``realistic'' semantic changes of the image changing the classifier decision. Current approaches for the generation of VCEs are restricted to adversarially robust models and often contain non-realistic artefacts, or are limited to image classification problems with few classes. In this paper, we overcome this by generating Diffusion Visual Counterfactual Explanations (DVCEs) for arbitrary ImageNet classifiers via a diffusion process. Two modifications to the diffusion process are key for our DVCEs: first, an adaptive parameterization, whose hyperparameters generalize across images and models, together with distance regularization and late start of the diffusion process, allow us to generate images with minimal semantic changes to the original ones but different classification. Second, our cone regularization via an adversarially robust model ensures that the diffusion process does not converge to trivial non-semantic changes, but instead produces realistic images of the target class which achieve high confidence by the classifier. Code is available under https://github.com/valentyn1boreiko/DVCEs.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning}, + file = {/Users/hhakem/Zotero/storage/K28D4X8F/Augustin et al. - 2022 - Diffusion Visual Counterfactual Explanations.pdf} +} + +@misc{augustinDiffusionVisualCounterfactual2022a, + title = {Diffusion {{Visual Counterfactual Explanations}}}, + author = {Augustin, Maximilian and Boreiko, Valentyn and Croce, Francesco and Hein, Matthias}, + year = {2022}, + month = oct, + number = {arXiv:2210.11841}, + eprint = {2210.11841}, + primaryclass = {cs}, + publisher = {arXiv}, + urldate = {2024-10-10}, + abstract = {Visual Counterfactual Explanations (VCEs) are an important tool to understand the decisions of an image classifier. They are ``small'' but ``realistic'' semantic changes of the image changing the classifier decision. Current approaches for the generation of VCEs are restricted to adversarially robust models and often contain non-realistic artefacts, or are limited to image classification problems with few classes. In this paper, we overcome this by generating Diffusion Visual Counterfactual Explanations (DVCEs) for arbitrary ImageNet classifiers via a diffusion process. Two modifications to the diffusion process are key for our DVCEs: first, an adaptive parameterization, whose hyperparameters generalize across images and models, together with distance regularization and late start of the diffusion process, allow us to generate images with minimal semantic changes to the original ones but different classification. Second, our cone regularization via an adversarially robust model ensures that the diffusion process does not converge to trivial non-semantic changes, but instead produces realistic images of the target class which achieve high confidence by the classifier. Code is available under https://github.com/valentyn1boreiko/DVCEs.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning}, + file = {/Users/hhakem/Zotero/storage/NPFZB574/Augustin et al. - 2022 - Diffusion Visual Counterfactual Explanations.pdf} +} + +@misc{baekCRADLEVAEEnhancingSingleCell2024, + title = {{{CRADLE-VAE}}: {{Enhancing Single-Cell Gene Perturbation Modeling}} with {{Counterfactual Reasoning-based Artifact Disentanglement}}}, + shorttitle = {{{CRADLE-VAE}}}, + author = {Baek, Seungheun and Park, Soyon and Chok, Yan Ting and Lee, Junhyun and Park, Jueon and Gim, Mogan and Kang, Jaewoo}, + year = {2024}, + month = sep, + number = {arXiv:2409.05484}, + eprint = {2409.05484}, + primaryclass = {cs, q-bio}, + publisher = {arXiv}, + urldate = {2024-09-12}, + abstract = {Predicting cellular responses to various perturbations is a critical focus in drug discovery and personalized therapeutics, with deep learning models playing a significant role in this endeavor. Single-cell datasets contain technical artifacts that may hinder the predictability of such models, which poses quality control issues highly regarded in this area. To address this, we propose CRADLE-VAE, a causal generative framework tailored for single-cell gene perturbation modeling, enhanced with counterfactual reasoning-based artifact disentanglement. Throughout training, CRADLE-VAE models the underlying latent distribution of technical artifacts and perturbation effects present in single-cell datasets. It employs counterfactual reasoning to effectively disentangle such artifacts by modulating the latent basal spaces and learns robust features for generating cellular response data with improved quality. Experimental results demonstrate that this approach improves not only treatment effect estimation performance but also generative quality as well. The CRADLE-VAE codebase is publicly available at https://github.com/dmis-lab/CRADLE-VAE.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning,Quantitative Biology - Genomics,Quantitative Biology - Quantitative Methods}, + file = {/Users/hhakem/Zotero/storage/F9V9P5YE/Baek et al. - 2024 - CRADLE-VAE Enhancing Single-Cell Gene Perturbatio.pdf} +} + +@misc{BeginnersGuideFine2024, + title = {A Beginners Guide to Fine Tuning {{LLM}} Using {{LoRA}}}, + year = {2024}, + month = feb, + journal = {zabirauf {\textbar}{\textbar} Zohaib}, + urldate = {2024-08-06}, + abstract = {Discover how to create a synthetic dataset, select the right metrics for evaluation, and fine-tune your model using LoRA for a narrow scenario. Plus, learn how to serve your model efficiently using LLaMa.cpp on Mac/Linux.}, + howpublished = {https://zohaib.me/a-beginners-guide-to-fine-tuning-llm-using-lora/}, + langid = {english}, + file = {/Users/hhakem/Zotero/storage/Q22APS5I/a-beginners-guide-to-fine-tuning-llm-using-lora.html} +} + +@article{bendelGeneticArchitectureProtein2024, + title = {The Genetic Architecture of Protein Interaction Affinity and Specificity}, + author = {Bendel, Alexandra M. and Faure, Andre J. and Klein, Dominique and Shimada, Kenji and Lyautey, Romane and Schiffelholz, Nicole and Kempf, Georg and Cavadini, Simone and Lehner, Ben and Diss, Guillaume}, + year = {2024}, + month = oct, + journal = {Nature Communications}, + volume = {15}, + number = {1}, + pages = {8868}, + publisher = {Nature Publishing Group}, + issn = {2041-1723}, + doi = {10.1038/s41467-024-53195-4}, + urldate = {2024-11-17}, + abstract = {The encoding and evolution of specificity and affinity in protein-protein interactions is poorly understood. Here, we address this question by quantifying how all mutations in one protein, JUN, alter binding to all other members of a protein family, the 54 human basic leucine zipper transcription factors. We fit a global thermodynamic model to the data to reveal that most affinity changing mutations equally affect JUN's affinity to all its interaction partners. Mutations that alter binding specificity are relatively rare but distributed throughout the interaction interface. Specificity is determined both by features that promote on-target interactions and by those that prevent off-target interactions. Approximately half of the specificity-defining residues in JUN contribute both to promoting on-target binding and preventing off-target binding. Nearly all specificity-altering mutations in the interaction interface are pleiotropic, also altering affinity to all partners. In contrast, mutations outside the interface can tune global affinity without affecting specificity. Our results reveal the distributed encoding of specificity and affinity in an interaction interface and how coiled-coils provide an elegant solution to the challenge of optimizing both~specificity and affinity in a large protein family.}, + copyright = {2024 The Author(s)}, + langid = {english}, + keywords = {Molecular evolution,Protein-protein interaction networks,Proteins}, + file = {/Users/hhakem/Zotero/storage/R3MPKFC2/Bendel et al. - 2024 - The genetic architecture of protein interaction af.pdf} +} + +@article{bergstraRandomSearchHyperParameter, + title = {Random {{Search}} for {{Hyper-Parameter Optimization}}}, + author = {Bergstra, James and Bengio, Yoshua}, + abstract = {Grid search and manual search are the most widely used strategies for hyper-parameter optimization. This paper shows empirically and theoretically that randomly chosen trials are more efficient for hyper-parameter optimization than trials on a grid. Empirical evidence comes from a comparison with a large previous study that used grid search and manual search to configure neural networks and deep belief networks. Compared with neural networks configured by a pure grid search, we find that random search over the same domain is able to find models that are as good or better within a small fraction of the computation time. Granting random search the same computational budget, random search finds better models by effectively searching a larger, less promising configuration space. Compared with deep belief networks configured by a thoughtful combination of manual search and grid search, purely random search over the same 32-dimensional configuration space found statistically equal performance on four of seven data sets, and superior performance on one of seven. A Gaussian process analysis of the function from hyper-parameters to validation set performance reveals that for most data sets only a few of the hyper-parameters really matter, but that different hyper-parameters are important on different data sets. This phenomenon makes grid search a poor choice for configuring algorithms for new data sets. Our analysis casts some light on why recent ``High Throughput'' methods achieve surprising success---they appear to search through a large number of hyper-parameters because most hyper-parameters do not matter much. We anticipate that growing interest in large hierarchical models will place an increasing burden on techniques for hyper-parameter optimization; this work shows that random search is a natural baseline against which to judge progress in the development of adaptive (sequential) hyper-parameter optimization algorithms.}, + langid = {english}, + file = {/Users/hhakem/Zotero/storage/MXX8EYJD/Bergstra and Bengio - Random Search for Hyper-Parameter Optimization.pdf} +} + +@misc{Bienvenue, + title = {Bienvenue}, + urldate = {2024-07-16}, + howpublished = {https://id.elsevier.com/as/authorization.oauth2?platSite=SD\%2Fscience\&additionalPlatSites=GH\%2Fgeneralhospital\%2CSC\%2Fscopus\&scope=openid\%20email\%20profile\%20els\_auth\_info\%20els\_idp\_info\%20els\_idp\_analytics\_attrs\%20els\_sa\_discover\%20urn\%3Acom\%3Aelsevier\%3Aidp\%3Apolicy\%3Aproduct\%3Aindv\_identity\&response\_type=code\&redirect\_uri=https\%3A\%2F\%2Fwww.sciencedirect.com\%2Fuser\%2Fidentity\%2Flanding\&authType=SINGLE\_SIGN\_IN\&prompt=login\&client\_id=SDFE-v4\&state=retryCounter\%3D0\%26csrfToken\%3D89dc58ee-81e9-494e-9154-ba7b9593f6f0\%26idpPolicy\%3Durn\%253Acom\%253Aelsevier\%253Aidp\%253Apolicy\%253Aproduct\%253Aindv\_identity\%26returnUrl\%3D\%252Fscience\%252Farticle\%252Fpii\%252FS2667318524000059\%253Fvia\%25253Dihub\%26prompt\%3Dlogin\%26cid\%3Datp-d37dfe96-b61a-4c5f-bf22-86ad02510ce6\&els\_policy=idp\_policy\_indv\_identity\_plus}, + file = {/Users/hhakem/Zotero/storage/XKQ6BWIX/authorization.html} +} + +@misc{BindCraftOneshotDesign, + title = {{{BindCraft}}: One-Shot Design of Functional Protein Binders {\textbar} {{bioRxiv}}}, + urldate = {2024-11-05}, + howpublished = {https://www.biorxiv.org/content/10.1101/2024.09.30.615802v1}, + file = {/Users/hhakem/Zotero/storage/CNZ3B3SQ/2024.09.30.html} +} + +@article{bosharAreGenomicLanguage2024, + title = {Are Genomic Language Models All You Need? {{Exploring}} Genomic Language Models on Protein Downstream Tasks}, + shorttitle = {Are Genomic Language Models All You Need?}, + author = {Boshar, Sam and Trop, Evan and {de Almeida}, Bernardo P and Copoiu, Liviu and Pierrot, Thomas}, + year = {2024}, + month = sep, + journal = {Bioinformatics}, + volume = {40}, + number = {9}, + pages = {btae529}, + issn = {1367-4811}, + doi = {10.1093/bioinformatics/btae529}, + urldate = {2024-10-17}, + abstract = {Large language models, trained on enormous corpora of biological sequences, are state-of-the-art for downstream genomic and proteomic tasks. Since the genome contains the information to encode all proteins, genomic language models (gLMs) hold the potential to make downstream predictions not only about DNA sequences, but also about proteins. However, the performance of gLMs on protein tasks remains unknown, due to few tasks pairing proteins with the coding DNA sequences (CDS) that can be processed by gLMs.In this work, we curated five such datasets and used them to evaluate the performance of gLMs and proteomic language models (pLMs). We show that gLMs are competitive and even outperform their pLMs counterparts on some tasks. The best performance was achieved using the retrieved CDS compared to sampling strategies. We found that training a joint genomic-proteomic model outperforms each individual approach, showing that they capture different but complementary sequence representations, as we demonstrate through model interpretation of their embeddings. Lastly, we explored different genomic tokenization schemes to improve downstream protein performance. We trained a new Nucleotide Transformer (50M) foundation model with 3mer tokenization that outperforms its 6mer counterpart on protein tasks while maintaining performance on genomics tasks. The application of gLMs to proteomics offers the potential to leverage rich CDS data, and in the spirit of the central dogma, the possibility of a unified and synergistic approach to genomics and proteomics.We make our inference code, 3mer pre-trained model weights and datasets available.}, + file = {/Users/hhakem/Zotero/storage/2T6XEW6Y/Boshar et al. - 2024 - Are genomic language models all you need Explorin.pdf;/Users/hhakem/Zotero/storage/HDVSTFR4/7745814.html} +} + +@misc{bourouPhenDiffRevealingSubtle2024, + title = {{{PhenDiff}}: {{Revealing Subtle Phenotypes}} with {{Diffusion Models}} in {{Real Images}}}, + shorttitle = {{{PhenDiff}}}, + author = {Bourou, Anis and Boyer, Thomas and Daupin, K{\'e}vin and Dubreuil, V{\'e}ronique and De Thonel, Aur{\'e}lie and Mezger, Val{\'e}rie and Genovesio, Auguste}, + year = {2024}, + month = jul, + number = {arXiv:2312.08290}, + eprint = {2312.08290}, + primaryclass = {cs, eess, q-bio}, + publisher = {arXiv}, + urldate = {2024-08-13}, + abstract = {For the past few years, deep generative models have increasingly been used in biological research for a variety of tasks. Recently, they have proven to be valuable for uncovering subtle cell phenotypic differences that are not directly discernible to the human eye. However, current methods employed to achieve this goal mainly rely on Generative Adversarial Networks (GANs). While effective, GANs encompass issues such as training instability and mode collapse, and they do not accurately map images back to the model's latent space, which is necessary to synthesize, manipulate, and thus interpret outputs based on real images. In this work, we introduce PhenDiff: a multi-class conditional method leveraging Diffusion Models (DMs) designed to identify shifts in cellular phenotypes by translating a real image from one condition to another. We qualitatively and quantitatively validate this method on cases where the phenotypic changes are visible or invisible, such as in low concentrations of drug treatments. Overall, PhenDiff represents a valuable tool for identifying cellular variations in real microscopy images. We anticipate that it could facilitate the understanding of diseases and advance drug discovery through the identification of novel biomarkers.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Machine Learning,Electrical Engineering and Systems Science - Image and Video Processing,Quantitative Biology - Quantitative Methods}, + file = {/Users/hhakem/Zotero/storage/Q5CUWSRC/Bourou et al. - 2024 - PhenDiff Revealing Subtle Phenotypes with Diffusi.pdf} +} + +@misc{brownleeGentleIntroductionGenerative2019, + title = {A {{Gentle Introduction}} to {{Generative Adversarial Networks}} ({{GANs}})}, + author = {Brownlee, Jason}, + year = {2019}, + month = jun, + journal = {MachineLearningMastery.com}, + urldate = {2024-07-08}, + abstract = {Generative Adversarial Networks, or GANs for short, are an approach to generative modeling using deep learning methods, such as convolutional neural networks. Generative modeling is an unsupervised learning task in machine learning that involves automatically discovering and learning the regularities or patterns in input data in such a way that the model can be used [{\dots}]}, + langid = {american}, + keywords = {Read}, + file = {/Users/hhakem/Zotero/storage/PDP7VBZX/what-are-generative-adversarial-networks-gans.html} +} + +@misc{brownleeHowImplementFrechet2019, + title = {How to {{Implement}} the {{Frechet Inception Distance}} ({{FID}}) for {{Evaluating GANs}}}, + author = {Brownlee, Jason}, + year = {2019}, + month = aug, + journal = {MachineLearningMastery.com}, + urldate = {2024-07-08}, + abstract = {The Frechet Inception Distance score, or FID for short, is a metric that calculates the distance between feature vectors calculated for real and generated images. The score summarizes how similar the two groups are in terms of statistics on computer vision features of the raw images calculated using the inception v3 model used for image [{\dots}]}, + langid = {american}, + keywords = {Read}, + file = {/Users/hhakem/Zotero/storage/PX6KWTQT/how-to-implement-the-frechet-inception-distance-fid-from-scratch.html} +} + +@article{buttenschoenPoseBustersAIbasedDocking2024, + title = {{{PoseBusters}}: {{AI-based}} Docking Methods Fail to Generate Physically Valid Poses or Generalise to Novel Sequences}, + shorttitle = {{{PoseBusters}}}, + author = {Buttenschoen, Martin and M.~Morris, Garrett and M.~Deane, Charlotte}, + year = {2024}, + journal = {Chemical Science}, + volume = {15}, + number = {9}, + pages = {3130--3139}, + publisher = {Royal Society of Chemistry}, + doi = {10.1039/D3SC04185A}, + urldate = {2024-11-05}, + langid = {english}, + file = {/Users/hhakem/Zotero/storage/GGCRAQPC/Buttenschoen et al. - 2024 - PoseBusters AI-based docking methods fail to gene.pdf} +} + +@misc{casalegnoTestingCompleteGuide2021, + title = {A/{{B Testing}} --- {{A}} Complete Guide to Statistical Testing}, + author = {Casalegno, Francesco}, + year = {2021}, + month = feb, + journal = {Medium}, + urldate = {2024-07-08}, + abstract = {Optimizing web marketing strategies through statistical testing}, + howpublished = {https://towardsdatascience.com/a-b-testing-a-complete-guide-to-statistical-testing-e3f1db140499}, + langid = {english}, + keywords = {Read}, + file = {/Users/hhakem/Zotero/storage/FFH48XKL/a-b-testing-a-complete-guide-to-statistical-testing-e3f1db140499.html} +} + +@article{chandrasekaranImagebasedProfilingDrug2021, + title = {Image-Based Profiling for Drug Discovery: Due for a Machine-Learning Upgrade?}, + shorttitle = {Image-Based Profiling for Drug Discovery}, + author = {Chandrasekaran, Srinivas Niranj and Ceulemans, Hugo and Boyd, Justin D. and Carpenter, Anne E.}, + year = {2021}, + month = feb, + journal = {Nature Reviews Drug Discovery}, + volume = {20}, + number = {2}, + pages = {145--159}, + publisher = {Nature Publishing Group}, + issn = {1474-1784}, + doi = {10.1038/s41573-020-00117-w}, + urldate = {2024-07-03}, + abstract = {Image-based profiling is a maturing strategy by which the rich information present in biological images is reduced to a multidimensional profile, a collection of extracted image-based features. These profiles can be mined for relevant patterns, revealing unexpected biological activity that is useful for many steps in the drug discovery process. Such applications include identifying disease-associated screenable phenotypes, understanding disease mechanisms and predicting a drug's activity, toxicity or mechanism of action. Several of these applications have been recently validated and have moved into production mode within academia and the pharmaceutical industry. Some of these have yielded disappointing results in practice but are now of renewed interest due to improved machine-learning strategies that better leverage image-based information. Although challenges remain, novel computational technologies such as deep learning and single-cell methods that better capture the biological information in images hold promise for accelerating drug discovery.}, + copyright = {2020 Springer Nature Limited}, + langid = {english}, + keywords = {Computational biology and bioinformatics,Phenotypic screening,Read}, + file = {/Users/hhakem/Zotero/storage/GCQR3CRQ/Chandrasekaran et al. - 2021 - Image-based profiling for drug discovery due for .pdf} +} + +@misc{chandrasekaranJUMPCellPainting2023, + title = {{{JUMP Cell Painting}} Dataset: Morphological Impact of 136,000 Chemical and Genetic Perturbations}, + shorttitle = {{{JUMP Cell Painting}} Dataset}, + author = {Chandrasekaran, Srinivas Niranj and Ackerman, Jeanelle and Alix, Eric and Ando, D. Michael and Arevalo, John and Bennion, Melissa and Boisseau, Nicolas and Borowa, Adriana and Boyd, Justin D. and Brino, Laurent and Byrne, Patrick J. and Ceulemans, Hugo and Ch'ng, Carolyn and Cimini, Beth A. and Clevert, Djork-Arne and Deflaux, Nicole and Doench, John G. and Dorval, Thierry and Doyonnas, Regis and Dragone, Vincenza and Engkvist, Ola and Faloon, Patrick W. and Fritchman, Briana and Fuchs, Florian and Garg, Sakshi and Gilbert, Tamara J. and Glazer, David and Gnutt, David and Goodale, Amy and Grignard, Jeremy and Guenther, Judith and Han, Yu and Hanifehlou, Zahra and Hariharan, Santosh and Hernandez, Desiree and Horman, Shane R. and Hormel, Gisela and Huntley, Michael and Icke, Ilknur and Iida, Makiyo and Jacob, Christina B. and Jaensch, Steffen and Khetan, Jawahar and {Kost-Alimova}, Maria and Krawiec, Tomasz and Kuhn, Daniel and Lardeau, Charles-Hugues and Lembke, Amanda and Lin, Francis and Little, Kevin D. and Lofstrom, Kenneth R. and Lotfi, Sofia and Logan, David J. and Luo, Yi and Madoux, Franck and Zapata, Paula A. Marin and Marion, Brittany A. and Martin, Glynn and McCarthy, Nicola Jane and Mervin, Lewis and Miller, Lisa and Mohamed, Haseeb and Monteverde, Tiziana and Mouchet, Elizabeth and Nicke, Barbara and Ogier, Arnaud and Ong, Anne-Laure and Osterland, Marc and Otrocka, Magdalena and Peeters, Pieter J. and Pilling, James and Prechtl, Stefan and Qian, Chen and Rataj, Krzysztof and Root, David E. and Sakata, Sylvie K. and Scrace, Simon and Shimizu, Hajime and Simon, David and Sommer, Peter and Spruiell, Craig and Sumia, Iffat and Swalley, Susanne E. and Terauchi, Hiroki and Thibaudeau, Amandine and Unruh, Amy and de Waeter, Jelle Van and Dyck, Michiel Van and van Staden, Carlo and Warcho{\l}, Micha{\l} and Weisbart, Erin and Weiss, Am{\'e}lie and {Wiest-Daessle}, Nicolas and Williams, Guy and Yu, Shan and Zapiec, Bolek and {\.Z}y{\l}a, Marek and Singh, Shantanu and Carpenter, Anne E.}, + year = {2023}, + month = mar, + primaryclass = {New Results}, + pages = {2023.03.23.534023}, + publisher = {bioRxiv}, + doi = {10.1101/2023.03.23.534023}, + urldate = {2024-07-03}, + abstract = {Image-based profiling has emerged as a powerful technology for various steps in basic biological and pharmaceutical discovery, but the community has lacked a large, public reference set of data from chemical and genetic perturbations. Here we present data generated by the Joint Undertaking for Morphological Profiling (JUMP)-Cell Painting Consortium, a collaboration between 10 pharmaceutical companies, six supporting technology companies, and two non-profit partners. When completed, the dataset will contain images and profiles from the Cell Painting assay for over 116,750 unique compounds, over-expression of 12,602 genes, and knockout of 7,975 genes using CRISPR-Cas9, all in human osteosarcoma cells (U2OS). The dataset is estimated to be 115 TB in size and capturing 1.6 billion cells and their single-cell profiles. File quality control and upload is underway and will be completed over the coming months at the Cell Painting Gallery: https://registry.opendata.aws/cellpainting-gallery. A portal to visualize a subset of the data is available at https://phenaid.ardigen.com/jumpcpexplorer/.}, + archiveprefix = {bioRxiv}, + chapter = {New Results}, + copyright = {{\copyright} 2023, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution 4.0 International), CC BY 4.0, as described at http://creativecommons.org/licenses/by/4.0/}, + langid = {english}, + keywords = {Read}, + file = {/Users/hhakem/Zotero/storage/99CUIW4H/Chandrasekaran et al. - 2023 - JUMP Cell Painting dataset morphological impact o.pdf} +} + +@misc{Cheminformatics_Course_2024Docx, + title = {Cheminformatics\_{{Course}}\_2024.Docx}, + journal = {Google Docs}, + urldate = {2024-07-16}, + abstract = {AI in Discovery Toxicology Workshop: Hands-on modeling of safety data July 2024 Machine learning (ML) and artificial intelligence (AI) models are gaining popularity across the drug discovery landscape. In this hands-on workshop, we will explain the construction and use of machine learning models...}, + howpublished = {https://docs.google.com/document/d/188kRzca5E-RQsHhJhkdrYElRhaWBl4fg/edit?usp=sharing\&ouid=111077171382563588729\&rtpof=true\&sd=true\&urp=gmail\_link\&usp=embed\_facebook}, + langid = {english}, + file = {/Users/hhakem/Zotero/storage/B2QVCDE4/edit.html} +} + +@article{chenApplyingInterpretableMachine2024, + title = {Applying Interpretable Machine Learning in Computational Biology---Pitfalls, Recommendations and Opportunities for New Developments}, + author = {Chen, Valerie and Yang, Muyu and Cui, Wenbo and Kim, Joon Sik and Talwalkar, Ameet and Ma, Jian}, + year = {2024}, + month = aug, + journal = {Nature Methods}, + volume = {21}, + number = {8}, + pages = {1454--1461}, + publisher = {Nature Publishing Group}, + issn = {1548-7105}, + doi = {10.1038/s41592-024-02359-7}, + urldate = {2024-09-16}, + abstract = {Recent advances in machine learning have enabled the development of next-generation predictive models for complex computational biology problems, thereby spurring the use of interpretable machine learning (IML) to unveil biological insights. However, guidelines for using IML in computational biology are generally underdeveloped. We provide an overview of IML methods and evaluation techniques and discuss common pitfalls encountered when applying IML methods to computational biology problems. We also highlight open questions, especially in the era of large language models, and call for collaboration between IML and computational biology researchers.}, + copyright = {2024 Springer Nature America, Inc.}, + langid = {english}, + keywords = {Computational models,Genomics,Machine learning,Software}, + file = {/Users/hhakem/Zotero/storage/QHTV9K7E/Chen et al. - 2024 - Applying interpretable machine learning in computa.pdf} +} + +@misc{choiStarGANUnifiedGenerative2018, + title = {{{StarGAN}}: {{Unified Generative Adversarial Networks}} for {{Multi-Domain Image-to-Image Translation}}}, + shorttitle = {{{StarGAN}}}, + author = {Choi, Yunjey and Choi, Minje and Kim, Munyoung and Ha, Jung-Woo and Kim, Sunghun and Choo, Jaegul}, + year = {2018}, + month = sep, + number = {arXiv:1711.09020}, + eprint = {1711.09020}, + primaryclass = {cs}, + publisher = {arXiv}, + doi = {10.48550/arXiv.1711.09020}, + urldate = {2024-09-02}, + abstract = {Recent studies have shown remarkable success in image-to-image translation for two domains. However, existing approaches have limited scalability and robustness in handling more than two domains, since different models should be built independently for every pair of image domains. To address this limitation, we propose StarGAN, a novel and scalable approach that can perform image-to-image translations for multiple domains using only a single model. Such a unified model architecture of StarGAN allows simultaneous training of multiple datasets with different domains within a single network. This leads to StarGAN's superior quality of translated images compared to existing models as well as the novel capability of flexibly translating an input image to any desired target domain. We empirically demonstrate the effectiveness of our approach on a facial attribute transfer and a facial expression synthesis tasks.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Computer Vision and Pattern Recognition}, + file = {/Users/hhakem/Zotero/storage/QKXK8CLZ/Choi et al. - 2018 - StarGAN Unified Generative Adversarial Networks f.pdf;/Users/hhakem/Zotero/storage/S2CMHRJE/1711.html} +} + +@misc{choiStarGANV2Diverse2020, + title = {{{StarGAN}} v2: {{Diverse Image Synthesis}} for {{Multiple Domains}}}, + shorttitle = {{{StarGAN}} V2}, + author = {Choi, Yunjey and Uh, Youngjung and Yoo, Jaejun and Ha, Jung-Woo}, + year = {2020}, + month = apr, + number = {arXiv:1912.01865}, + eprint = {1912.01865}, + primaryclass = {cs}, + publisher = {arXiv}, + urldate = {2024-09-04}, + abstract = {A good image-to-image translation model should learn a mapping between different visual domains while satisfying the following properties: 1) diversity of generated images and 2) scalability over multiple domains. Existing methods address either of the issues, having limited diversity or multiple models for all domains. We propose StarGAN v2, a single framework that tackles both and shows significantly improved results over the baselines. Experiments on CelebA-HQ and a new animal faces dataset (AFHQ) validate our superiority in terms of visual quality, diversity, and scalability. To better assess image-to-image translation models, we release AFHQ, high-quality animal faces with large inter- and intra-domain differences. The code, pretrained models, and dataset can be found at https://github.com/clovaai/stargan-v2.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning}, + file = {/Users/hhakem/Zotero/storage/3P8JLEDZ/Choi et al. - 2020 - StarGAN v2 Diverse Image Synthesis for Multiple D.pdf} +} + +@misc{CMU15445645, + title = {{{CMU}} 15-445/645 :: {{Intro}} to {{Database Systems}} ({{Fall}} 2024)}, + shorttitle = {{{CMU}} 15-445/645}, + journal = {CMU 15-445/645}, + urldate = {2024-09-03}, + abstract = {Yes, this is the premier course at Carnegie Mellon University on the design and implementation of database management systems. Topics include data models (relational, document, key/value), storage models (n-ary, decomposition), query languages (SQL, stored procedures), storage architectures (heaps, log-structured), indexes + filters (order preserving trees, hash tables, vector indexes), transaction processing (ACID, concurrency control), recovery (logging, checkpoints), query processing (joins, sorting, aggregation, optimization), and parallel architectures (multi-core, distributed). Case studies on open-source and commercial database systems are used to illustrate these techniques and trade-offs. The course is appropriate for students that are prepared to sweat their systems programming skills.}, + howpublished = {https://15445.courses.cs.cmu.edu/fall2024}, + langid = {english}, + file = {/Users/hhakem/Zotero/storage/RA5RQN39/fall2024.html} +} + +@article{cunninghamSPARSEAUTOENCODERSFIND2024, + title = {{{SPARSE AUTOENCODERS FIND HIGHLY INTER- PRETABLE FEATURES IN LANGUAGE MODELS}}}, + author = {Cunningham, Hoagy and Ewart, Aidan and Riggs, Logan and Huben, Robert and Sharkey, Lee}, + year = {2024}, + abstract = {One of the roadblocks to a better understanding of neural networks' internals is polysemanticity, where neurons appear to activate in multiple, semantically distinct contexts. Polysemanticity prevents us from identifying concise, humanunderstandable explanations for what neural networks are doing internally. One hypothesised cause of polysemanticity is superposition, where neural networks represent more features than they have neurons by assigning features to an overcomplete set of directions in activation space, rather than to individual neurons. Here, we attempt to identify those directions, using sparse autoencoders to reconstruct the internal activations of a language model. These autoencoders learn sets of sparsely activating features that are more interpretable and monosemantic than directions identified by alternative approaches, where interpretability is measured by automated methods. Moreover, we show that with our learned set of features, we can pinpoint the features that are causally responsible for counterfactual behaviour on the indirect object identification task (Wang et al., 2022) to a finer degree than previous decompositions. This work indicates that it is possible to resolve superposition in language models using a scalable, unsupervised method. Our method may serve as a foundation for future mechanistic interpretability work, which we hope will enable greater model transparency and steerability.}, + langid = {english}, + file = {/Users/hhakem/Zotero/storage/NMRAPCK6/Cunningham et al. - 2024 - SPARSE AUTOENCODERS FIND HIGHLY INTER- PRETABLE FE.pdf} +} + +@article{cunninghamSPARSEAUTOENCODERSFIND2024a, + title = {{{SPARSE AUTOENCODERS FIND HIGHLY INTER- PRETABLE FEATURES IN LANGUAGE MODELS}}}, + author = {Cunningham, Hoagy and Ewart, Aidan and Riggs, Logan and Huben, Robert and Sharkey, Lee}, + year = {2024}, + abstract = {One of the roadblocks to a better understanding of neural networks' internals is polysemanticity, where neurons appear to activate in multiple, semantically distinct contexts. Polysemanticity prevents us from identifying concise, humanunderstandable explanations for what neural networks are doing internally. One hypothesised cause of polysemanticity is superposition, where neural networks represent more features than they have neurons by assigning features to an overcomplete set of directions in activation space, rather than to individual neurons. Here, we attempt to identify those directions, using sparse autoencoders to reconstruct the internal activations of a language model. These autoencoders learn sets of sparsely activating features that are more interpretable and monosemantic than directions identified by alternative approaches, where interpretability is measured by automated methods. Moreover, we show that with our learned set of features, we can pinpoint the features that are causally responsible for counterfactual behaviour on the indirect object identification task (Wang et al., 2022) to a finer degree than previous decompositions. This work indicates that it is possible to resolve superposition in language models using a scalable, unsupervised method. Our method may serve as a foundation for future mechanistic interpretability work, which we hope will enable greater model transparency and steerability.}, + langid = {english}, + file = {/Users/hhakem/Zotero/storage/PFYP7CYE/Cunningham et al. - 2024 - SPARSE AUTOENCODERS FIND HIGHLY INTER- PRETABLE FE.pdf} +} + +@article{cunninghamSPARSEAUTOENCODERSFIND2024b, + title = {{{SPARSE AUTOENCODERS FIND HIGHLY INTER- PRETABLE FEATURES IN LANGUAGE MODELS}}}, + author = {Cunningham, Hoagy and Ewart, Aidan and Riggs, Logan and Huben, Robert and Sharkey, Lee}, + year = {2024}, + abstract = {One of the roadblocks to a better understanding of neural networks' internals is polysemanticity, where neurons appear to activate in multiple, semantically distinct contexts. Polysemanticity prevents us from identifying concise, humanunderstandable explanations for what neural networks are doing internally. One hypothesised cause of polysemanticity is superposition, where neural networks represent more features than they have neurons by assigning features to an overcomplete set of directions in activation space, rather than to individual neurons. Here, we attempt to identify those directions, using sparse autoencoders to reconstruct the internal activations of a language model. These autoencoders learn sets of sparsely activating features that are more interpretable and monosemantic than directions identified by alternative approaches, where interpretability is measured by automated methods. Moreover, we show that with our learned set of features, we can pinpoint the features that are causally responsible for counterfactual behaviour on the indirect object identification task (Wang et al., 2022) to a finer degree than previous decompositions. This work indicates that it is possible to resolve superposition in language models using a scalable, unsupervised method. Our method may serve as a foundation for future mechanistic interpretability work, which we hope will enable greater model transparency and steerability.}, + langid = {english}, + file = {/Users/hhakem/Zotero/storage/DJTVC5NW/Cunningham et al. - 2024 - SPARSE AUTOENCODERS FIND HIGHLY INTER- PRETABLE FE.pdf} +} + +@misc{dalla-torreNucleotideTransformerBuilding2024, + title = {The {{Nucleotide Transformer}}: {{Building}} and {{Evaluating Robust Foundation Models}} for {{Human Genomics}}}, + shorttitle = {The {{Nucleotide Transformer}}}, + author = {{Dalla-Torre}, Hugo and Gonzalez, Liam and {Mendoza-Revilla}, Javier and Carranza, Nicolas Lopez and Grzywaczewski, Adam Henryk and Oteri, Francesco and Dallago, Christian and Trop, Evan and de Almeida, Bernardo P. and Sirelkhatim, Hassan and Richard, Guillaume and Skwark, Marcin and Beguir, Karim and Lopez, Marie and Pierrot, Thomas}, + year = {2024}, + month = oct, + primaryclass = {New Results}, + pages = {2023.01.11.523679}, + publisher = {bioRxiv}, + doi = {10.1101/2023.01.11.523679}, + urldate = {2024-10-17}, + abstract = {Closing the gap between measurable genetic information and observable traits is a longstanding challenge in genomics. Yet, the prediction of molecular phenotypes from DNA sequences alone remains limited and inaccurate, often driven by the scarcity of annotated data and the inability to transfer learning between prediction tasks. Here, we present an extensive study of foundation models pre-trained on DNA sequences, named the Nucleotide Transformer, ranging from 50M up to 2.5B parameters and integrating information from 3,202 diverse human genomes, as well as 850 genomes selected across diverse phyla, including both model and non-model organisms. These transformer models yield transferable, context-specific representations of nucleotide sequences, which allow for accurate molecular phenotype prediction even in low-data settings. We show that the developed models can be fine-tuned at low cost and despite low available data regime to solve a variety of genomics applications. Despite no supervision, the transformer models learned to focus attention on key genomic elements, including those that regulate gene expression, such as enhancers. Lastly, we demonstrate that utilizing model representations can improve the prioritization of functional genetic variants. The training and application of foundational models in genomics explored in this study provide a widely applicable stepping stone to bridge the gap of accurate molecular phenotype prediction from DNA sequence. Code and weights available on GitHub in Jax and HuggingFace in Pytorch. Example notebooks to apply these models to any downstream task are available on HuggingFace.}, + archiveprefix = {bioRxiv}, + chapter = {New Results}, + copyright = {{\copyright} 2024, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution-NoDerivs 4.0 International), CC BY-ND 4.0, as described at http://creativecommons.org/licenses/by-nd/4.0/}, + langid = {english}, + file = {/Users/hhakem/Zotero/storage/N99LCFXH/Dalla-Torre et al. - 2024 - The Nucleotide Transformer Building and Evaluatin.pdf} +} + +@article{dauparasRobustDeepLearning2022, + title = {Robust Deep Learning--Based Protein Sequence Design Using {{ProteinMPNN}}}, + author = {Dauparas, J. and Anishchenko, I. and Bennett, N. and Bai, H. and Ragotte, R. J. and Milles, L. F. and Wicky, B. I. M. and Courbet, A. and {de Haas}, R. J. and Bethel, N. and Leung, P. J. Y. and Huddy, T. F. and Pellock, S. and Tischer, D. and Chan, F. and Koepnick, B. and Nguyen, H. and Kang, A. and Sankaran, B. and Bera, A. K. and King, N. P. and Baker, D.}, + year = {2022}, + month = oct, + journal = {Science}, + volume = {378}, + number = {6615}, + pages = {49--56}, + publisher = {American Association for the Advancement of Science}, + doi = {10.1126/science.add2187}, + urldate = {2024-12-09}, + abstract = {Although deep learning has revolutionized protein structure prediction, almost all experimentally characterized de novo protein designs have been generated using physically based approaches such as Rosetta. Here, we describe a deep learning--based protein sequence design method, ProteinMPNN, that has outstanding performance in both in silico and experimental tests. On native protein backbones, ProteinMPNN has a sequence recovery of 52.4\% compared with 32.9\% for Rosetta. The amino acid sequence at different positions can be coupled between single or multiple chains, enabling application to a wide range of current protein design challenges. We demonstrate the broad utility and high accuracy of ProteinMPNN using x-ray crystallography, cryo--electron microscopy, and functional studies by rescuing previously failed designs, which were made using Rosetta or AlphaFold, of protein monomers, cyclic homo-oligomers, tetrahedral nanoparticles, and target-binding proteins.}, + file = {/Users/hhakem/Zotero/storage/CHWD3YXK/Dauparas et al. - 2022 - Robust deep learning–based protein sequence design.pdf} +} + +@misc{debDeMystifyingXGBoostII2021, + title = {De-{{Mystifying XGBoost II}}}, + author = {Deb}, + year = {2021}, + month = jan, + journal = {Medium}, + urldate = {2024-08-01}, + abstract = {Digging into the hyperparameters}, + howpublished = {https://towardsdatascience.com/de-mystifying-xgboost-part-ii-175252dcdbc5}, + langid = {english}, + file = {/Users/hhakem/Zotero/storage/2ZTXWG6M/de-mystifying-xgboost-part-ii-175252dcdbc5.html} +} + +@misc{devlinBERTPretrainingDeep2019, + title = {{{BERT}}: {{Pre-training}} of {{Deep Bidirectional Transformers}} for {{Language Understanding}}}, + shorttitle = {{{BERT}}}, + author = {Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina}, + year = {2019}, + month = may, + number = {arXiv:1810.04805}, + eprint = {1810.04805}, + publisher = {arXiv}, + urldate = {2024-10-20}, + abstract = {We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations from Transformers. Unlike recent language representation models, BERT is designed to pre-train deep bidirectional representations from unlabeled text by jointly conditioning on both left and right context in all layers. As a result, the pre-trained BERT model can be fine-tuned with just one additional output layer to create state-of-the-art models for a wide range of tasks, such as question answering and language inference, without substantial task-specific architecture modifications. BERT is conceptually simple and empirically powerful. It obtains new state-of-the-art results on eleven natural language processing tasks, including pushing the GLUE score to 80.5\% (7.7\% point absolute improvement), MultiNLI accuracy to 86.7\% (4.6\% absolute improvement), SQuAD v1.1 question answering Test F1 to 93.2 (1.5 point absolute improvement) and SQuAD v2.0 Test F1 to 83.1 (5.1 point absolute improvement).}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Computation and Language}, + file = {/Users/hhakem/Zotero/storage/W8BJ6XPP/Devlin et al. - 2019 - BERT Pre-training of Deep Bidirectional Transform.pdf;/Users/hhakem/Zotero/storage/HTGNWV49/1810.html} +} + +@misc{DifferenceAlexNetVGGNet, + title = {Difference between {{AlexNet}}, {{VGGNet}}, {{ResNet}}, and {{Inception}} {\textbar} by {{Aqeel Anwar}} {\textbar} {{Towards Data Science}}}, + urldate = {2024-08-14}, + howpublished = {https://towardsdatascience.com/the-w3h-of-alexnet-vggnet-resnet-and-inception-7baaaecccc96}, + file = {/Users/hhakem/Zotero/storage/YR2DG8PT/the-w3h-of-alexnet-vggnet-resnet-and-inception-7baaaecccc96.html} +} + +@misc{dingProteinLanguageModels2024, + title = {Protein Language Models Are Biased by Unequal Sequence Sampling across the Tree of Life}, + author = {Ding, Frances and Steinhardt, Jacob}, + year = {2024}, + month = mar, + primaryclass = {New Results}, + pages = {2024.03.07.584001}, + publisher = {bioRxiv}, + doi = {10.1101/2024.03.07.584001}, + urldate = {2024-11-03}, + abstract = {Protein language models (pLMs) trained on large protein sequence databases have been used to understand disease and design novel proteins. In design tasks, the likelihood of a protein sequence under a pLM is often used as a proxy for protein fitness, so it is critical to understand what signals likelihoods capture. In this work we find that pLM likelihoods unintentionally encode a species bias: likelihoods of protein sequences from certain species are systematically higher, independent of the protein in question. We quantify this bias and show that it arises in large part because of unequal species representation in popular protein sequence databases. We further show that the bias can be detrimental for some protein design applications, such as enhancing thermostability. These results highlight the importance of understanding and curating pLM training data to mitigate biases and improve protein design capabilities in under-explored parts of sequence space.}, + archiveprefix = {bioRxiv}, + chapter = {New Results}, + copyright = {{\copyright} 2024, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution 4.0 International), CC BY 4.0, as described at http://creativecommons.org/licenses/by/4.0/}, + langid = {english}, + file = {/Users/hhakem/Zotero/storage/YWGDIYQW/Ding and Steinhardt - 2024 - Protein language models are biased by unequal sequ.pdf} +} + +@misc{doshi-velezRigorousScienceInterpretable2017, + title = {Towards {{A Rigorous Science}} of {{Interpretable Machine Learning}}}, + author = {{Doshi-Velez}, Finale and Kim, Been}, + year = {2017}, + month = mar, + number = {arXiv:1702.08608}, + eprint = {1702.08608}, + publisher = {arXiv}, + urldate = {2024-10-20}, + abstract = {As machine learning systems become ubiquitous, there has been a surge of interest in interpretable machine learning: systems that provide explanation for their outputs. These explanations are often used to qualitatively assess other criteria such as safety or non-discrimination. However, despite the interest in interpretability, there is very little consensus on what interpretable machine learning is and how it should be measured. In this position paper, we first define interpretability and describe when interpretability is needed (and when it is not). Next, we suggest a taxonomy for rigorous evaluation and expose open questions towards a more rigorous science of interpretable machine learning.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning,Statistics - Machine Learning}, + file = {/Users/hhakem/Zotero/storage/E93GUCB9/Doshi-Velez and Kim - 2017 - Towards A Rigorous Science of Interpretable Machin.pdf;/Users/hhakem/Zotero/storage/8MWSMKUK/1702.html} +} + +@misc{DuplicateTabShortcut, + title = {Duplicate Tab Shortcut - {{Google Search}}}, + urldate = {2024-07-12}, + howpublished = {https://www.google.com/search?q=duplicate+tab+shortcut\&oq=duplicate+tab+\&gs\_lcrp=EgZjaHJvbWUqDQgAEAAYgwEYsQMYgAQyDQgAEAAYgwEYsQMYgAQyBggBEEUYOTIHCAIQABiABDIHCAMQABiABDIHCAQQABiABDIHCAUQABiABDIHCAYQABiABDIHCAcQABiABDIHCAgQABiABDIHCAkQABiABNIBCTQ2NTVqMGoxNagCCLACAQ\&sourceid=chrome\&ie=UTF-8}, + file = {/Users/hhakem/Zotero/storage/FK8AXHY7/search.html} +} + +@article{durantFutureMachineLearning2024, + title = {The Future of Machine Learning for Small-Molecule Drug Discovery Will Be Driven by Data}, + author = {Durant, Guy and Boyles, Fergus and Birchall, Kristian and Deane, Charlotte M.}, + year = {2024}, + month = oct, + journal = {Nature Computational Science}, + volume = {4}, + number = {10}, + pages = {735--743}, + publisher = {Nature Publishing Group}, + issn = {2662-8457}, + doi = {10.1038/s43588-024-00699-0}, + urldate = {2024-11-03}, + abstract = {Many studies have prophesied that the integration of machine learning techniques into small-molecule therapeutics development will help to deliver a true leap forward in drug discovery. However, increasingly advanced algorithms and novel architectures have not always yielded substantial improvements in results. In this Perspective, we propose that a greater focus on the data for training and benchmarking these models is more likely to drive future improvement, and explore avenues for future research and strategies to address these data challenges.}, + copyright = {2024 Springer Nature America, Inc.}, + langid = {english}, + keywords = {Cheminformatics,Computational science,Drug screening}, + file = {/Users/hhakem/Zotero/storage/RQKQHYGK/Durant et al. - 2024 - The future of machine learning for small-molecule .pdf} +} + +@misc{ecksteinDiscriminativeAttributionCounterfactuals2021, + title = {Discriminative {{Attribution}} from {{Counterfactuals}}}, + author = {Eckstein, Nils and Bates, Alexander S. and Jefferis, Gregory S. X. E. and Funke, Jan}, + year = {2021}, + month = sep, + number = {arXiv:2109.13412}, + eprint = {2109.13412}, + primaryclass = {cs}, + publisher = {arXiv}, + doi = {10.48550/arXiv.2109.13412}, + urldate = {2024-07-03}, + abstract = {We present a method for neural network interpretability by combining feature attribution with counterfactual explanations to generate attribution maps that highlight the most discriminative features between pairs of classes. We show that this method can be used to quantitatively evaluate the performance of feature attribution methods in an objective manner, thus preventing potential observer bias. We evaluate the proposed method on three diverse datasets, including a challenging artificial dataset and real-world biological data. We show quantitatively and qualitatively that the highlighted features are substantially more discriminative than those extracted using conventional attribution methods and argue that this type of explanation is better suited for understanding fine grained class differences as learned by a deep neural network.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning,Read}, + file = {/Users/hhakem/Zotero/storage/XATEPJUT/Eckstein et al. - 2021 - Discriminative Attribution from Counterfactuals.pdf;/Users/hhakem/Zotero/storage/CNTSVJLD/2109.html} +} + +@misc{elliotwaitePyTorchAutogradExplained2018, + title = {{{PyTorch Autograd Explained}} - {{In-depth Tutorial}}}, + author = {{Elliot Waite}}, + year = {2018}, + month = nov, + urldate = {2024-10-07}, + abstract = {In this PyTorch tutorial, I explain how the PyTorch autograd system works by going through some examples and visualize the graphs with diagrams. As you perform operations on PyTorch tensors that have requires\_grad=True, you build up an autograd backward graph. Then when you call the backward() method on one of the output nodes, the backward graph gets traversed, starting at the node that the grad\_fn attribute of the output node points to, and traversing backwards from there, accumulating gradients until leaf nodes are reached. The final leaf node gradients will be stored on the grad attribute of the leaf tensors. This is my first PyTorch tutorial video. If you'd like to see more PyTorch related videos, let me know in the comments. And if you have anything specific about PyTorch that you would like me to make videos about, let me know. The diagrams.net flowcharts shown in the video: 🔗 https://drive.google.com/file/d/1bq3a... (Note: Click this link to go to the Google Drive file, then click the button in the top center that says "Open with diagrams.net", then once it's loaded, there will be tabs along the bottom of the diagrams.net page for all the different graphs shown in the video.) Join our Discord community: 💬 ~~/~discord~~ Connect with me: 🐦 Twitter - ~~/~elliotwaite~~ 📷 Instagram - ~~/~elliotwaite~~ 👱 Facebook - ~~/~elliotwaite~~ 💼 LinkedIn - ~~/~elliotwaite~~ 🎵 ksolis - Nobody Else (~~~{$\bullet~$}ksolis~-~Nobody~Else~~)} +} + +@misc{elnaggarAnkhOptimizedProtein2023, + title = {Ankh: {{Optimized Protein Language Model Unlocks General-Purpose Modelling}}}, + shorttitle = {Ankh}, + author = {Elnaggar, Ahmed and Essam, Hazem and {Salah-Eldin}, Wafaa and Moustafa, Walid and Elkerdawy, Mohamed and Rochereau, Charlotte and Rost, Burkhard}, + year = {2023}, + month = jan, + number = {arXiv:2301.06568}, + eprint = {2301.06568}, + publisher = {arXiv}, + doi = {10.48550/arXiv.2301.06568}, + urldate = {2024-10-31}, + abstract = {As opposed to scaling-up protein language models (PLMs), we seek improving performance via protein-specific optimization. Although the proportionality between the language model size and the richness of its learned representations is validated, we prioritize accessibility and pursue a path of data-efficient, cost-reduced, and knowledge-guided optimization. Through over twenty experiments ranging from masking, architecture, and pre-training data, we derive insights from protein-specific experimentation into building a model that interprets the language of life, optimally. We present Ankh, the first general-purpose PLM trained on Google's TPU-v4 surpassing the state-of-the-art performance with fewer parameters ({$<$}10\% for pre-training, {$<$}7\% for inference, and {$<$}30\% for the embedding dimension). We provide a representative range of structure and function benchmarks where Ankh excels. We further provide a protein variant generation analysis on High-N and One-N input data scales where Ankh succeeds in learning protein evolutionary conservation-mutation trends and introducing functional diversity while retaining key structural-functional characteristics. We dedicate our work to promoting accessibility to research innovation via attainable resources.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Computation and Language,Computer Science - Distributed Parallel and Cluster Computing,Computer Science - Machine Learning,Quantitative Biology - Quantitative Methods}, + file = {/Users/hhakem/Zotero/storage/N2YRH5LL/Elnaggar et al. - 2023 - Ankh Optimized Protein Language Model Unlocks Gen.pdf;/Users/hhakem/Zotero/storage/9J55RFL7/2301.html} +} + +@article{EmbeddingAIBiology2024, + title = {Embedding {{AI}} in Biology}, + year = {2024}, + month = aug, + journal = {Nature Methods}, + volume = {21}, + number = {8}, + pages = {1365--1366}, + publisher = {Nature Publishing Group}, + issn = {1548-7105}, + doi = {10.1038/s41592-024-02391-7}, + urldate = {2024-08-12}, + abstract = {Advanced artificial intelligence approaches are rapidly transforming how biological data are acquired and analyzed.}, + copyright = {2024 Springer Nature America, Inc.}, + langid = {english}, + keywords = {Cancer,Genetic engineering,Genomics,Imaging,Immunology,Machine learning,Microscopy,Neuroscience,Protein structure predictions}, + file = {/Users/hhakem/Zotero/storage/GWXEG29X/2024 - Embedding AI in biology.pdf} +} + +@misc{erdemburnpiroXAIMethodsIntegrated2022, + title = {{{XAI Methods}} --- {{Integrated Gradients}}}, + author = {Erdem (burnpiro), Kemal}, + year = {2022}, + month = apr, + journal = {Medium}, + urldate = {2024-07-03}, + abstract = {Dive into Integrated Gradients method. How the values are calculated? What are the different baselines?}, + langid = {english}, + keywords = {Feature-attribution,Read}, + file = {/Users/hhakem/Zotero/storage/F2ERGIDS/xai-methods-integrated-gradients-6ee1fe4120d8.html} +} + +@misc{ExtantFoldswitchingProteins, + title = {Extant Fold-Switching Proteins Are Widespread {\textbar} {{PNAS}}}, + urldate = {2024-11-05}, + howpublished = {https://www.pnas.org/doi/10.1073/pnas.1800168115}, + file = {/Users/hhakem/Zotero/storage/ZHYXJ57W/pnas.html} +} + +@article{faureGeneticArchitectureProtein2024, + title = {The Genetic Architecture of Protein Stability}, + author = {Faure, Andre J. and {Mart{\'i}-Aranda}, Aina and {Hidalgo-Carcedo}, Cristina and Beltran, Antoni and Schmiedel, J{\"o}rn M. and Lehner, Ben}, + year = {2024}, + month = oct, + journal = {Nature}, + volume = {634}, + number = {8035}, + pages = {995--1003}, + publisher = {Nature Publishing Group}, + issn = {1476-4687}, + doi = {10.1038/s41586-024-07966-0}, + urldate = {2024-11-17}, + abstract = {There are more ways to synthesize a 100-amino acid (aa) protein (20100) than there are atoms in the universe. Only a very small fraction of such a vast sequence space can ever be experimentally or computationally surveyed. Deep neural networks are increasingly being used to navigate high-dimensional sequence spaces1. However, these models are extremely complicated. Here, by experimentally sampling from sequence spaces larger than 1010, we show that the genetic architecture of at least some proteins is remarkably simple, allowing accurate genetic prediction in high-dimensional sequence spaces with fully interpretable energy models. These models capture the nonlinear relationships between free energies and phenotypes but otherwise consist of additive free energy changes with a small contribution from pairwise energetic couplings. These energetic couplings are sparse and associated with structural contacts and backbone proximity. Our results indicate that protein genetics is actually both rather simple and intelligible.}, + copyright = {2024 The Author(s)}, + langid = {english}, + keywords = {Biophysics,Computational biology and bioinformatics,Genomics}, + file = {/Users/hhakem/Zotero/storage/GVV2FQ8J/Faure et al. - 2024 - The genetic architecture of protein stability.pdf} +} + +@article{flemingCellBenderRemovebackgroundDeep, + title = {{{CellBender}} Remove-Background: A Deep Generative Model for Unsupervised Removal of Background Noise from {{scRNA-seq}} Datasets}, + author = {Fleming, Stephen J and Marioni, John C and Babadi, Mehrtash}, + abstract = {Droplet-based scRNA-seq assays are known to produce a significant amount of background RNA counts, the hallmark of which is non-zero transcript counts in presumably empty droplets. The presence of background RNA can lead to systematic biases and batch effects in various downstream analyses such as differential expression and marker gene discovery. In this paper, we explore the phenomenology and mechanisms of background RNA generation in dropletbased scRNA-seq assays and present a deep generative model of background-contaminated counts mirroring those mechanisms. The model is used for learning the background RNA profile, distinguishing cell-containing droplets from empty ones, and retrieving background-free gene expression profiles. We implement the model along with a fast and scalable inference algorithm as the remove-background module in CellBender, an open-source scRNA-seq data processing software package. Finally, we present simulations and investigations of several scRNA-seq datasets to show that processing raw data using CellBender significantly boosts the magnitude and specificity of differential expression across different cell types.}, + langid = {english}, + file = {/Users/hhakem/Zotero/storage/K2DLNUR9/Fleming et al. - CellBender remove-background a deep generative mo.pdf} +} + +@article{fleuretLittleBookDeep, + title = {The {{Little Book}} of {{Deep Learning}}}, + author = {Fleuret, Fran{\c c}ois}, + langid = {english}, + keywords = {UnRead}, + file = {/Users/hhakem/Zotero/storage/7JAPEIXX/Fleuret - The Little Book of Deep Learning.pdf} +} + +@misc{gaddWaveLSTMMultiscaleAnalysis2024, + title = {Wave-{{LSTM}}: {{Multi-scale}} Analysis of Somatic Whole Genome Copy Number Profiles}, + shorttitle = {Wave-{{LSTM}}}, + author = {Gadd, Charles and Yau, Christopher}, + year = {2024}, + month = aug, + number = {arXiv:2408.12636}, + eprint = {2408.12636}, + publisher = {arXiv}, + urldate = {2024-11-04}, + abstract = {Changes in the number of copies of certain parts of the genome, known as copy number alterations (CNAs), due to somatic mutation processes are a hallmark of many cancers. This genomic complexity is known to be associated with poorer outcomes for patients but describing its contribution in detail has been difficult. Copy number alterations can affect large regions spanning whole chromosomes or the entire genome itself but can also be localised to only small segments of the genome and no methods exist that allow this multi-scale nature to be quantified. In this paper, we address this using Wave-LSTM, a signal decomposition approach designed to capture the multi-scale structure of complex whole genome copy number profiles. Using wavelet-based source separation in combination with deep learning-based attention mechanisms. We show that Wave-LSTM can be used to derive multi-scale representations from copy number profiles which can be used to decipher sub-clonal structures from single-cell copy number data and to improve survival prediction performance from patient tumour profiles.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Machine Learning,Quantitative Biology - Genomics}, + file = {/Users/hhakem/Zotero/storage/5Z9LHJN9/Gadd and Yau - 2024 - Wave-LSTM Multi-scale analysis of somatic whole g.pdf;/Users/hhakem/Zotero/storage/ZW4WGDM7/2408.html} +} + +@misc{GenerativeAdversarialNetwork2019, + title = {Generative {{Adversarial Network}} ({{GAN}})}, + year = {2019}, + month = jan, + journal = {GeeksforGeeks}, + urldate = {2024-07-08}, + abstract = {A Computer Science portal for geeks. It contains well written, well thought and well explained computer science and programming articles, quizzes and practice/competitive programming/company interview Questions.}, + chapter = {Technical Scripter}, + howpublished = {https://www.geeksforgeeks.org/generative-adversarial-network-gan/}, + langid = {american}, + keywords = {Read}, + file = {/Users/hhakem/Zotero/storage/CPJ9NMD5/generative-adversarial-network-gan.html} +} + +@misc{goodfellowNIPS2016Tutorial2017, + title = {{{NIPS}} 2016 {{Tutorial}}: {{Generative Adversarial Networks}}}, + shorttitle = {{{NIPS}} 2016 {{Tutorial}}}, + author = {Goodfellow, Ian}, + year = {2017}, + month = apr, + number = {arXiv:1701.00160}, + eprint = {1701.00160}, + primaryclass = {cs}, + publisher = {arXiv}, + urldate = {2024-07-09}, + abstract = {This report summarizes the tutorial presented by the author at NIPS 2016 on generative adversarial networks (GANs). The tutorial describes: (1) Why generative modeling is a topic worth studying, (2) how generative models work, and how GANs compare to other generative models, (3) the details of how GANs work, (4) research frontiers in GANs, and (5) state-of-the-art image models that combine GANs with other methods. Finally, the tutorial contains three exercises for readers to complete, and the solutions to these exercises.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Machine Learning,UnRead}, + file = {/Users/hhakem/Zotero/storage/7TPANPGM/Goodfellow - 2017 - NIPS 2016 Tutorial Generative Adversarial Network.pdf} +} + +@misc{heuselGANsTrainedTwo2018, + title = {{{GANs Trained}} by a {{Two Time-Scale Update Rule Converge}} to a {{Local Nash Equilibrium}}}, + author = {Heusel, Martin and Ramsauer, Hubert and Unterthiner, Thomas and Nessler, Bernhard and Hochreiter, Sepp}, + year = {2018}, + month = jan, + number = {arXiv:1706.08500}, + eprint = {1706.08500}, + primaryclass = {cs, stat}, + publisher = {arXiv}, + urldate = {2024-07-08}, + abstract = {Generative Adversarial Networks (GANs) excel at creating realistic images with complex models for which maximum likelihood is infeasible. However, the convergence of GAN training has still not been proved. We propose a two time-scale update rule (TTUR) for training GANs with stochastic gradient descent on arbitrary GAN loss functions. TTUR has an individual learning rate for both the discriminator and the generator. Using the theory of stochastic approximation, we prove that the TTUR converges under mild assumptions to a stationary local Nash equilibrium. The convergence carries over to the popular Adam optimization, for which we prove that it follows the dynamics of a heavy ball with friction and thus prefers flat minima in the objective landscape. For the evaluation of the performance of GANs at image generation, we introduce the `Fr{\'e}chet Inception Distance'' (FID) which captures the similarity of generated images to real ones better than the Inception Score. In experiments, TTUR improves learning for DCGANs and Improved Wasserstein GANs (WGAN-GP) outperforming conventional GAN training on CelebA, CIFAR-10, SVHN, LSUN Bedrooms, and the One Billion Word Benchmark.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Machine Learning,Statistics - Machine Learning,UnRead}, + file = {/Users/hhakem/Zotero/storage/JVAAFGIV/Heusel et al. - 2018 - GANs Trained by a Two Time-Scale Update Rule Conve.pdf} +} + +@misc{hoDenoisingDiffusionProbabilistic2020, + title = {Denoising {{Diffusion Probabilistic Models}}}, + author = {Ho, Jonathan and Jain, Ajay and Abbeel, Pieter}, + year = {2020}, + month = dec, + number = {arXiv:2006.11239}, + eprint = {2006.11239}, + primaryclass = {cs, stat}, + publisher = {arXiv}, + urldate = {2024-10-17}, + abstract = {We present high quality image synthesis results using diffusion probabilistic models, a class of latent variable models inspired by considerations from nonequilibrium thermodynamics. Our best results are obtained by training on a weighted variational bound designed according to a novel connection between diffusion probabilistic models and denoising score matching with Langevin dynamics, and our models naturally admit a progressive lossy decompression scheme that can be interpreted as a generalization of autoregressive decoding. On the unconditional CIFAR10 dataset, we obtain an Inception score of 9.46 and a state-of-the-art FID score of 3.17. On 256x256 LSUN, we obtain sample quality similar to ProgressiveGAN. Our implementation is available at https://github.com/hojonathanho/diffusion.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}, + file = {/Users/hhakem/Zotero/storage/E7FNITBG/Ho et al. - 2020 - Denoising Diffusion Probabilistic Models.pdf} +} + +@misc{huangArbitraryStyleTransfer2017, + title = {Arbitrary {{Style Transfer}} in {{Real-time}} with {{Adaptive Instance Normalization}}}, + author = {Huang, Xun and Belongie, Serge}, + year = {2017}, + month = jul, + number = {arXiv:1703.06868}, + eprint = {1703.06868}, + primaryclass = {cs}, + publisher = {arXiv}, + urldate = {2024-10-02}, + abstract = {Gatys et al. recently introduced a neural algorithm that renders a content image in the style of another image, achieving so-called style transfer. However, their framework requires a slow iterative optimization process, which limits its practical application. Fast approximations with feed-forward neural networks have been proposed to speed up neural style transfer. Unfortunately, the speed improvement comes at a cost: the network is usually tied to a fixed set of styles and cannot adapt to arbitrary new styles. In this paper, we present a simple yet effective approach that for the first time enables arbitrary style transfer in real-time. At the heart of our method is a novel adaptive instance normalization (AdaIN) layer that aligns the mean and variance of the content features with those of the style features. Our method achieves speed comparable to the fastest existing approach, without the restriction to a pre-defined set of styles. In addition, our approach allows flexible user controls such as content-style trade-off, style interpolation, color \& spatial controls, all using a single feed-forward neural network.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Computer Vision and Pattern Recognition}, + file = {/Users/hhakem/Zotero/storage/9TJG5L25/Huang and Belongie - 2017 - Arbitrary Style Transfer in Real-time with Adaptiv.pdf} +} + +@misc{ilanchezianGeneratingRealisticCounterfactuals2023, + title = {Generating {{Realistic Counterfactuals}} for {{Retinal Fundus}} and {{OCT Images}} Using {{Diffusion Models}}}, + author = {Ilanchezian, Indu and Boreiko, Valentyn and K{\"u}hlewein, Laura and Huang, Ziwei and Ayhan, Murat Se{\c c}kin and Hein, Matthias and Koch, Lisa and Berens, Philipp}, + year = {2023}, + month = dec, + number = {arXiv:2311.11629}, + eprint = {2311.11629}, + primaryclass = {cs}, + publisher = {arXiv}, + urldate = {2024-10-10}, + abstract = {Counterfactual reasoning is often used in clinical settings to explain decisions or weigh alternatives. Therefore, for imaging based specialties such as ophthalmology, it would be beneficial to be able to create counterfactual images, illustrating answers to questions like ''If the subject had had diabetic retinopathy, how would the fundus image have looked?''. Here, we demonstrate that using a diffusion model in combination with an adversarially robust classifier trained on retinal disease classification tasks enables the generation of highly realistic counterfactuals of retinal fundus images and optical coherence tomography (OCT) B-scans. The key to the realism of counterfactuals is that these classifiers encode salient features indicative for each disease class and can steer the diffusion model to depict disease signs or remove disease-related lesions in a realistic way. In a user study, domain experts also found the counterfactuals generated using our method significantly more realistic than counterfactuals generated from a previous method, and even indistinguishable from real images.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning}, + file = {/Users/hhakem/Zotero/storage/KY5UU97M/Ilanchezian et al. - 2023 - Generating Realistic Counterfactuals for Retinal F.pdf} +} + +@misc{ilanchezianGeneratingRealisticCounterfactuals2023a, + title = {Generating {{Realistic Counterfactuals}} for {{Retinal Fundus}} and {{OCT Images}} Using {{Diffusion Models}}}, + author = {Ilanchezian, Indu and Boreiko, Valentyn and K{\"u}hlewein, Laura and Huang, Ziwei and Ayhan, Murat Se{\c c}kin and Hein, Matthias and Koch, Lisa and Berens, Philipp}, + year = {2023}, + month = dec, + number = {arXiv:2311.11629}, + eprint = {2311.11629}, + primaryclass = {cs}, + publisher = {arXiv}, + urldate = {2024-10-10}, + abstract = {Counterfactual reasoning is often used in clinical settings to explain decisions or weigh alternatives. Therefore, for imaging based specialties such as ophthalmology, it would be beneficial to be able to create counterfactual images, illustrating answers to questions like ''If the subject had had diabetic retinopathy, how would the fundus image have looked?''. Here, we demonstrate that using a diffusion model in combination with an adversarially robust classifier trained on retinal disease classification tasks enables the generation of highly realistic counterfactuals of retinal fundus images and optical coherence tomography (OCT) B-scans. The key to the realism of counterfactuals is that these classifiers encode salient features indicative for each disease class and can steer the diffusion model to depict disease signs or remove disease-related lesions in a realistic way. In a user study, domain experts also found the counterfactuals generated using our method significantly more realistic than counterfactuals generated from a previous method, and even indistinguishable from real images.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning}, + file = {/Users/hhakem/Zotero/storage/T8DT8RD5/Ilanchezian et al. - 2023 - Generating Realistic Counterfactuals for Retinal F.pdf} +} + +@misc{isolaImagetoImageTranslationConditional2018, + title = {Image-to-{{Image Translation}} with {{Conditional Adversarial Networks}}}, + author = {Isola, Phillip and Zhu, Jun-Yan and Zhou, Tinghui and Efros, Alexei A.}, + year = {2018}, + month = nov, + number = {arXiv:1611.07004}, + eprint = {1611.07004}, + primaryclass = {cs}, + publisher = {arXiv}, + urldate = {2024-07-08}, + abstract = {We investigate conditional adversarial networks as a general-purpose solution to image-to-image translation problems. These networks not only learn the mapping from input image to output image, but also learn a loss function to train this mapping. This makes it possible to apply the same generic approach to problems that traditionally would require very different loss formulations. We demonstrate that this approach is effective at synthesizing photos from label maps, reconstructing objects from edge maps, and colorizing images, among other tasks. Indeed, since the release of the pix2pix software associated with this paper, a large number of internet users (many of them artists) have posted their own experiments with our system, further demonstrating its wide applicability and ease of adoption without the need for parameter tweaking. As a community, we no longer hand-engineer our mapping functions, and this work suggests we can achieve reasonable results without hand-engineering our loss functions either.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Computer Vision and Pattern Recognition,Read}, + file = {/Users/hhakem/Zotero/storage/E3B2CAFV/Isola et al. - 2018 - Image-to-Image Translation with Conditional Advers.pdf} +} + +@misc{izmailovAveragingWeightsLeads2019, + title = {Averaging {{Weights Leads}} to {{Wider Optima}} and {{Better Generalization}}}, + author = {Izmailov, Pavel and Podoprikhin, Dmitrii and Garipov, Timur and Vetrov, Dmitry and Wilson, Andrew Gordon}, + year = {2019}, + month = feb, + number = {arXiv:1803.05407}, + eprint = {1803.05407}, + primaryclass = {cs, stat}, + publisher = {arXiv}, + doi = {10.48550/arXiv.1803.05407}, + urldate = {2024-09-03}, + abstract = {Deep neural networks are typically trained by optimizing a loss function with an SGD variant, in conjunction with a decaying learning rate, until convergence. We show that simple averaging of multiple points along the trajectory of SGD, with a cyclical or constant learning rate, leads to better generalization than conventional training. We also show that this Stochastic Weight Averaging (SWA) procedure finds much flatter solutions than SGD, and approximates the recent Fast Geometric Ensembling (FGE) approach with a single model. Using SWA we achieve notable improvement in test accuracy over conventional SGD training on a range of state-of-the-art residual networks, PyramidNets, DenseNets, and Shake-Shake networks on CIFAR-10, CIFAR-100, and ImageNet. In short, SWA is extremely easy to implement, improves generalization, and has almost no computational overhead.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Artificial Intelligence,Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning,Statistics - Machine Learning}, + file = {/Users/hhakem/Zotero/storage/LZWGEFIZ/Izmailov et al. - 2019 - Averaging Weights Leads to Wider Optima and Better.pdf;/Users/hhakem/Zotero/storage/WL923B9D/1803.html} +} + +@misc{johnsonPerceptualLossesRealTime2016, + title = {Perceptual {{Losses}} for {{Real-Time Style Transfer}} and {{Super-Resolution}}}, + author = {Johnson, Justin and Alahi, Alexandre and {Fei-Fei}, Li}, + year = {2016}, + month = mar, + number = {arXiv:1603.08155}, + eprint = {1603.08155}, + primaryclass = {cs}, + publisher = {arXiv}, + urldate = {2024-07-09}, + abstract = {We consider image transformation problems, where an input image is transformed into an output image. Recent methods for such problems typically train feed-forward convolutional neural networks using a per-pixel loss between the output and ground-truth images. Parallel work has shown that high-quality images can be generated by defining and optimizing perceptual loss functions based on high-level features extracted from pretrained networks. We combine the benefits of both approaches, and propose the use of perceptual loss functions for training feed-forward networks for image transformation tasks. We show results on image style transfer, where a feed-forward network is trained to solve the optimization problem proposed by Gatys et al in real-time. Compared to the optimization-based method, our network gives similar qualitative results but is three orders of magnitude faster. We also experiment with single-image super-resolution, where replacing a per-pixel loss with a perceptual loss gives visually pleasing results.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning,Read}, + file = {/Users/hhakem/Zotero/storage/MF2XZ5U7/Johnson et al. - Perceptual Losses for Real-Time Style Transfer and.pdf;/Users/hhakem/Zotero/storage/ZHNZPAJY/Johnson et al. - 2016 - Perceptual Losses for Real-Time Style Transfer and.pdf} +} + +@article{juFederatedLearningPredicting2024, + title = {Federated Learning for Predicting Compound Mechanism of Action Based on Image-Data from Cell Painting}, + author = {Ju, Li and Hellander, Andreas and Spjuth, Ola}, + year = {2024}, + month = jun, + journal = {Artificial Intelligence in the Life Sciences}, + volume = {5}, + pages = {100098}, + issn = {2667-3185}, + doi = {10.1016/j.ailsci.2024.100098}, + urldate = {2024-07-16}, + abstract = {Having access to sufficient data is essential in order to train accurate machine learning models, but much data is not publicly available. In drug discovery this is particularly evident, as much data is withheld at pharmaceutical companies for various reasons. Federated Learning (FL) aims at training a joint model between multiple parties but without disclosing data between the parties. In this work, we leverage Federated Learning to predict compound Mechanism of Action (MoA) using fluorescence image data from cell painting. Our study evaluates the effectiveness and efficiency of FL, comparing to non-collaborative and data-sharing collaborative learning in diverse scenarios. Specifically, we investigate the impact of data heterogeneity across participants on MoA prediction, an essential concern in real-life applications of FL, and demonstrate the benefits for all involved parties. This work highlights the potential of federated learning in multi-institutional collaborative machine learning for drug discovery and assessment of chemicals, offering a promising avenue to overcome data-sharing constraints.}, + keywords = {Artificial intelligence,Cell painting,Cell profiling,Collaborative learning,Federated learning}, + file = {/Users/hhakem/Zotero/storage/F7RMWVN3/Ju et al. - 2024 - Federated learning for predicting compound mechani.pdf;/Users/hhakem/Zotero/storage/MPJT8HBK/S2667318524000059.html} +} + +@misc{kalamkarStudyBFLOAT16Deep2019, + title = {A {{Study}} of {{BFLOAT16}} for {{Deep Learning Training}}}, + author = {Kalamkar, Dhiraj and Mudigere, Dheevatsa and Mellempudi, Naveen and Das, Dipankar and Banerjee, Kunal and Avancha, Sasikanth and Vooturi, Dharma Teja and Jammalamadaka, Nataraj and Huang, Jianyu and Yuen, Hector and Yang, Jiyan and Park, Jongsoo and Heinecke, Alexander and Georganas, Evangelos and Srinivasan, Sudarshan and Kundu, Abhisek and Smelyanskiy, Misha and Kaul, Bharat and Dubey, Pradeep}, + year = {2019}, + month = jun, + number = {arXiv:1905.12322}, + eprint = {1905.12322}, + primaryclass = {cs, stat}, + publisher = {arXiv}, + urldate = {2024-10-11}, + abstract = {This paper presents the first comprehensive empirical study demonstrating the efficacy of the Brain Floating Point (BFLOAT16) half-precision format for Deep Learning training across image classification, speech recognition, language modeling, generative networks and industrial recommendation systems. BFLOAT16 is attractive for Deep Learning training for two reasons: the range of values it can represent is the same as that of IEEE 754 floating-point format (FP32) and conversion to/from FP32 is simple. Maintaining the same range as FP32 is important to ensure that no hyper-parameter tuning is required for convergence; e.g., IEEE 754 compliant half-precision floating point (FP16) requires hyper-parameter tuning. In this paper, we discuss the flow of tensors and various key operations in mixed precision training, and delve into details of operations, such as the rounding modes for converting FP32 tensors to BFLOAT16. We have implemented a method to emulate BFLOAT16 operations in Tensorflow, Caffe2, IntelCaffe, and Neon for our experiments. Our results show that deep learning training using BFLOAT16 tensors achieves the same state-of-the-art (SOTA) results across domains as FP32 tensors in the same number of iterations and with no changes to hyper-parameters.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}, + file = {/Users/hhakem/Zotero/storage/GHIM8QIX/Kalamkar et al. - 2019 - A Study of BFLOAT16 for Deep Learning Training.pdf} +} + +@misc{kalininVersatileInformationRetrieval2024, + title = {A Versatile Information Retrieval Framework for Evaluating Profile Strength and Similarity}, + author = {Kalinin, Alexandr A. and Arevalo, John and Vulliard, Loan and Serrano, Erik and Tsang, Hillary and Bornholdt, Michael and Rajwa, Bartek and Carpenter, Anne E. and Way, Gregory P. and Singh, Shantanu}, + year = {2024}, + month = apr, + primaryclass = {New Results}, + pages = {2024.04.01.587631}, + publisher = {bioRxiv}, + doi = {10.1101/2024.04.01.587631}, + urldate = {2024-07-03}, + abstract = {In profiling assays, thousands of biological properties are measured in a single test, yielding biological discoveries by capturing the state of a cell population, often at the single-cell level. However, for profiling datasets, it has been challenging to evaluate the phenotypic activity of a sample and the phenotypic consistency among samples, due to profiles' high dimensionality, heterogeneous nature, and non-linear properties. Existing methods leave researchers uncertain where to draw boundaries between meaningful biological response and technical noise. Here, we developed a statistical framework that uses the well-established mean average precision (mAP) as a single, data-driven metric to bridge this gap. We validated the mAP framework against established metrics through simulations and real-world data applications, revealing its ability to capture subtle and meaningful biological differences in cell state. Specifically, we used mAP to assess both phenotypic activity for a given perturbation (or a sample) as well as consistency within groups of perturbations (or samples) across diverse high-dimensional datasets. We evaluated the framework on different profile types (image, protein, and mRNA profiles), perturbation types (CRISPR gene editing, gene overexpression, and small molecules), and profile resolutions (single-cell and bulk). Our open-source software allows this framework to be applied to identify interesting biological phenomena and promising therapeutics from large-scale profiling data.}, + archiveprefix = {bioRxiv}, + chapter = {New Results}, + copyright = {{\copyright} 2024, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution 4.0 International), CC BY 4.0, as described at http://creativecommons.org/licenses/by/4.0/}, + langid = {english}, + keywords = {Read}, + file = {/Users/hhakem/Zotero/storage/R8HUS4T9/Kalinin et al. - 2024 - A versatile information retrieval framework for ev.pdf} +} + +@misc{karrasTrainingGenerativeAdversarial2020, + title = {Training {{Generative Adversarial Networks}} with {{Limited Data}}}, + author = {Karras, Tero and Aittala, Miika and Hellsten, Janne and Laine, Samuli and Lehtinen, Jaakko and Aila, Timo}, + year = {2020}, + month = oct, + number = {arXiv:2006.06676}, + eprint = {2006.06676}, + primaryclass = {cs, stat}, + publisher = {arXiv}, + urldate = {2024-10-10}, + abstract = {Training generative adversarial networks (GAN) using too little data typically leads to discriminator overfitting, causing training to diverge. We propose an adaptive discriminator augmentation mechanism that significantly stabilizes training in limited data regimes. The approach does not require changes to loss functions or network architectures, and is applicable both when training from scratch and when fine-tuning an existing GAN on another dataset. We demonstrate, on several datasets, that good results are now possible using only a few thousand training images, often matching StyleGAN2 results with an order of magnitude fewer images. We expect this to open up new application domains for GANs. We also find that the widely used CIFAR-10 is, in fact, a limited data benchmark, and improve the record FID from 5.59 to 2.42.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning,Computer Science - Neural and Evolutionary Computing,Statistics - Machine Learning}, + file = {/Users/hhakem/Zotero/storage/MU78J2RH/Karras et al. - 2020 - Training Generative Adversarial Networks with Limi.pdf} +} + +@misc{karrasTrainingGenerativeAdversarial2020a, + title = {Training {{Generative Adversarial Networks}} with {{Limited Data}}}, + author = {Karras, Tero and Aittala, Miika and Hellsten, Janne and Laine, Samuli and Lehtinen, Jaakko and Aila, Timo}, + year = {2020}, + month = oct, + number = {arXiv:2006.06676}, + eprint = {2006.06676}, + primaryclass = {cs, stat}, + publisher = {arXiv}, + urldate = {2024-10-10}, + abstract = {Training generative adversarial networks (GAN) using too little data typically leads to discriminator overfitting, causing training to diverge. We propose an adaptive discriminator augmentation mechanism that significantly stabilizes training in limited data regimes. The approach does not require changes to loss functions or network architectures, and is applicable both when training from scratch and when fine-tuning an existing GAN on another dataset. We demonstrate, on several datasets, that good results are now possible using only a few thousand training images, often matching StyleGAN2 results with an order of magnitude fewer images. We expect this to open up new application domains for GANs. We also find that the widely used CIFAR-10 is, in fact, a limited data benchmark, and improve the record FID from 5.59 to 2.42.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning,Computer Science - Neural and Evolutionary Computing,Statistics - Machine Learning}, + file = {/Users/hhakem/Zotero/storage/DGZUANDW/Karras et al. - 2020 - Training Generative Adversarial Networks with Limi.pdf} +} + +@article{khojaste-sarakhsi3DMultiscaleCycleGAN2024, + title = {A {{3D}} Multi-Scale {{CycleGAN}} Framework for Generating Synthetic {{PETs}} from {{MRIs}} for {{Alzheimer}}'s Disease Diagnosis}, + author = {{Khojaste-Sarakhsi}, M. and Haghighi, Seyedhamidreza Shahabi and Ghomi, S. M. T. Fatemi and Marchiori, Elena}, + year = {2024}, + month = jun, + journal = {Image and Vision Computing}, + volume = {146}, + pages = {105017}, + issn = {0262-8856}, + doi = {10.1016/j.imavis.2024.105017}, + urldate = {2024-07-09}, + abstract = {This paper proposes a novel framework for generating synthesized PET images from MRIs to fill in missing PETs and help with Alzheimer's disease (AD) diagnosis. This framework employs a 3D multi-scale image-to-image CycleGAN architecture for the end-to-end translation of MRI and PET domains together. A hybrid loss function is also proposed to enforce structural similarity while preserving voxel-wise similarity and avoiding blurry images. As shown by the quantitative and visual assessment of the synthesized PETs, this framework is superior to the state-of-the-art. Moreover, using these synthesized PETs helps improve the ternary classification of AD subjects (AD vs. MCI vs. NC). Specifically, assuming an extreme case where none of the subjects has a PET, feeding the classifier with MRIs and their corresponding synthetic PETs results in a more accurate diagnosis than feeding it with just available MRIs. Accordingly, the proposed framework can help improve AD diagnosis, which is the final goal of the current study. Ablation investigation of the proposed multi-scale framework as well as the proposed loss function, is also conducted to study their contribution to the quality of synthesized PETs. Furthermore, other factors, such as stopping criteria, the type of normalization layer, the activation function, and dropouts, are examined, concluding that the appropriate use of these factors can significantly improve the quality of synthesized PETs.}, + keywords = {3D image-to-image translation,Alzheimer's Disease diagnosis,Cycle GAN,Image synthesis,Multi-scale GAN,UnRead}, + file = {/Users/hhakem/Zotero/storage/7I6HJPU9/S0262885624001215.html} +} + +@article{koeppelEngineeringStructuralVariants2024, + title = {Engineering Structural Variants to Interrogate Genome Function}, + author = {Koeppel, Jonas and Weller, Juliane and Vanderstichele, Thomas and Parts, Leopold}, + year = {2024}, + month = nov, + journal = {Nature Genetics}, + pages = {1--13}, + publisher = {Nature Publishing Group}, + issn = {1546-1718}, + doi = {10.1038/s41588-024-01981-7}, + urldate = {2024-11-17}, + abstract = {Structural variation, such as deletions, duplications, inversions and complex rearrangements, can have profound effects on gene expression, genome stability, phenotypic diversity and disease susceptibility. Structural variants can encompass up to millions of bases and have the potential to rearrange substantial segments of the genome. They contribute considerably more to genetic diversity in human populations and have larger effects on phenotypic traits than point mutations. Until recently, our understanding of the effects of structural variants was driven mainly by studying naturally occurring variation. New genome-engineering tools capable of generating deletions, insertions, inversions and translocations, together with the discovery of new recombinases and advances in creating synthetic DNA constructs, now enable the design and generation of an extended range of structural variation. Here, we discuss these tools and examples of their application and highlight existing challenges that will need to be overcome to fully harness their potential.}, + copyright = {2024 Springer Nature America, Inc.}, + langid = {english}, + keywords = {Biomedical engineering,Clinical genetics,Genetic engineering,Genetics research,Genomics}, + file = {/Users/hhakem/Zotero/storage/W9GKUSVM/Koeppel et al. - 2024 - Engineering structural variants to interrogate gen.pdf} +} + +@misc{kohUnderstandingBlackboxPredictions2020, + title = {Understanding {{Black-box Predictions}} via {{Influence Functions}}}, + author = {Koh, Pang Wei and Liang, Percy}, + year = {2020}, + month = dec, + number = {arXiv:1703.04730}, + eprint = {1703.04730}, + publisher = {arXiv}, + urldate = {2024-11-02}, + abstract = {How can we explain the predictions of a black-box model? In this paper, we use influence functions -- a classic technique from robust statistics -- to trace a model's prediction through the learning algorithm and back to its training data, thereby identifying training points most responsible for a given prediction. To scale up influence functions to modern machine learning settings, we develop a simple, efficient implementation that requires only oracle access to gradients and Hessian-vector products. We show that even on non-convex and non-differentiable models where the theory breaks down, approximations to influence functions can still provide valuable information. On linear models and convolutional neural networks, we demonstrate that influence functions are useful for multiple purposes: understanding model behavior, debugging models, detecting dataset errors, and even creating visually-indistinguishable training-set attacks.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning,Statistics - Machine Learning}, + file = {/Users/hhakem/Zotero/storage/XQILPYEU/Koh and Liang - 2020 - Understanding Black-box Predictions via Influence .pdf;/Users/hhakem/Zotero/storage/G9PGVZA6/1703.html} +} + +@article{kramerNonadditivityAnalysis2019, + title = {Nonadditivity {{Analysis}}}, + author = {Kramer, Christian}, + year = {2019}, + month = sep, + journal = {Journal of Chemical Information and Modeling}, + volume = {59}, + number = {9}, + pages = {4034--4042}, + publisher = {American Chemical Society}, + issn = {1549-9596}, + doi = {10.1021/acs.jcim.9b00631}, + urldate = {2024-08-06}, + abstract = {We introduce the statistics behind a novel type of SAR analysis named ``nonadditivity analysis''. On the basis of all pairs of matched pairs within a given data set, the approach analyzes whether the same transformations between related molecules have the same effect, i.e., whether they are additive. Assuming that the experimental uncertainty is normally distributed, the additivities can be analyzed with statistical rigor and sets of compounds can be found that show significant nonadditivity. Nonadditivity analysis can not only detect nonadditivity, potential SAR outliers, and sets of key compounds but also allow estimating an upper limit of the experimental uncertainty in the data set. We demonstrate how complex SAR features that inform medicinal chemistry can be found in large SAR data sets. Finally, we show how the upper limit of experimental uncertainty for a given biochemical assay can be estimated without the need for repeated measurements of the same protein--ligand system.}, + file = {/Users/hhakem/Zotero/storage/6D6E8BNS/Kramer - 2019 - Nonadditivity Analysis.pdf} +} + +@misc{kulyteImprovingAntibodyDesign2024, + title = {Improving {{Antibody Design}} with {{Force-Guided Sampling}} in {{Diffusion Models}}}, + author = {Kulyt{\.e}, Paulina and Vargas, Francisco and Mathis, Simon Valentin and Wang, Yu Guang and {Hern{\'a}ndez-Lobato}, Jos{\'e} Miguel and Li{\`o}, Pietro}, + year = {2024}, + month = sep, + number = {arXiv:2406.05832}, + eprint = {2406.05832}, + publisher = {arXiv}, + doi = {10.48550/arXiv.2406.05832}, + urldate = {2024-11-15}, + abstract = {Antibodies, crucial for immune defense, primarily rely on complementarity-determining regions (CDRs) to bind and neutralize antigens, such as viruses. The design of these CDRs determines the antibody's affinity and specificity towards its target. Generative models, particularly denoising diffusion probabilistic models (DDPMs), have shown potential to advance the structure-based design of CDR regions. However, only a limited dataset of bound antibody-antigen structures is available, and generalization to out-of-distribution interfaces remains a challenge. Physics based force-fields, which approximate atomic interactions, offer a coarse but universal source of information to better mold designs to target interfaces. Integrating this foundational information into diffusion models is, therefore, highly desirable. Here, we propose a novel approach to enhance the sampling process of diffusion models by integrating force field energy-based feedback. Our model, DiffForce, employs forces to guide the diffusion sampling process, effectively blending the two distributions. Through extensive experiments, we demonstrate that our method guides the model to sample CDRs with lower energy, enhancing both the structure and sequence of the generated antibodies.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Machine Learning,Quantitative Biology - Biomolecules,Quantitative Biology - Quantitative Methods}, + file = {/Users/hhakem/Zotero/storage/I35A9CLF/Kulytė et al. - 2024 - Improving Antibody Design with Force-Guided Sampli.pdf;/Users/hhakem/Zotero/storage/XHTLA776/2406.html} +} + +@article{lamiableRevealingInvisibleCell2023, + title = {Revealing Invisible Cell Phenotypes with Conditional Generative Modeling}, + author = {Lamiable, Alexis and Champetier, Tiphaine and Leonardi, Francesco and Cohen, Ethan and Sommer, Peter and Hardy, David and Argy, Nicolas and Massougbodji, Achille and Del Nery, Elaine and Cottrell, Gilles and Kwon, Yong-Jun and Genovesio, Auguste}, + year = {2023}, + month = oct, + journal = {Nature Communications}, + volume = {14}, + number = {1}, + pages = {6386}, + publisher = {Nature Publishing Group}, + issn = {2041-1723}, + doi = {10.1038/s41467-023-42124-6}, + urldate = {2024-07-03}, + abstract = {Biological sciences, drug discovery and medicine rely heavily on cell phenotype perturbation and microscope observation. However, most cellular phenotypic changes are subtle and thus hidden from us by natural cell variability: two cells in the same condition already look different. In this study, we show that conditional generative models can be used to transform an image of cells from any one condition to another, thus canceling cell variability. We visually and quantitatively validate that the principle of synthetic cell perturbation works on discernible cases. We then illustrate its effectiveness in displaying otherwise invisible cell phenotypes triggered by blood cells under parasite infection, or by the presence of a disease-causing pathological mutation in differentiated neurons derived from iPSCs, or by low concentration drug treatments. The proposed approach, easy to use and robust, opens the door to more accessible discovery of biological and disease biomarkers.}, + copyright = {2023 The Author(s)}, + langid = {english}, + keywords = {Biomarkers,Cellular imaging,Image processing,Organelles,Read}, + file = {/Users/hhakem/Zotero/storage/C6PQ7R4P/Lamiable et al. - 2023 - Revealing invisible cell phenotypes with condition.pdf} +} + +@misc{LectureVideosMachine, + title = {Lecture {{Videos}} {\textbar} {{Machine Learning}} for {{Healthcare}} {\textbar} {{Electrical Engineering}} and {{Computer Science}}}, + journal = {MIT OpenCourseWare}, + urldate = {2024-10-18}, + abstract = {Full video lectures for for 6.S897 Machine Learning for Healthcare.}, + howpublished = {https://ocw.mit.edu/courses/6-s897-machine-learning-for-healthcare-spring-2019/video\_galleries/lecture-videos/}, + langid = {english}, + file = {/Users/hhakem/Zotero/storage/8CEN9YXG/lecture-videos.html} +} + +@article{lobentanzerMolecularCausalityAdvent2024, + title = {Molecular Causality in the Advent of Foundation Models}, + author = {Lobentanzer, Sebastian and {Rodriguez-Mier}, Pablo and Bauer, Stefan and {Saez-Rodriguez}, Julio}, + year = {2024}, + month = aug, + journal = {Molecular systems biology}, + volume = {20}, + number = {8}, + pages = {848--858}, + issn = {1744-4292}, + doi = {10.1038/s44320-024-00041-w}, + urldate = {2024-11-04}, + abstract = {Correlation is not causation: this simple and uncontroversial statement has far-reaching implications. Defining and applying causality in biomedical research has posed significant challenges to the scientific community. In this perspective, we attempt to connect the partly disparate fields of systems biology, causal reasoning, and machine learning to inform future approaches in the field of systems biology and molecular medicine.}, + copyright = {cc by}, + langid = {english}, + pmcid = {PMC11297329}, + pmid = {38890548}, + keywords = {Causality,Foundation Models,Inductive Bias,Latent Spaces,systems biology}, + file = {/Users/hhakem/Zotero/storage/ZLKETHRK/Lobentanzer et al. - 2024 - Molecular causality in the advent of foundation mo.pdf} +} + +@article{lotfollahiPredictingCellularResponses2023, + title = {Predicting Cellular Responses to Complex Perturbations in High-throughput Screens}, + author = {Lotfollahi, Mohammad and Klimovskaia Susmelj, Anna and De Donno, Carlo and Hetzel, Leon and Ji, Yuge and Ibarra, Ignacio L and Srivatsan, Sanjay R and Naghipourfar, Mohsen and Daza, Riza M and Martin, Beth and Shendure, Jay and McFaline-Figueroa, Jose L and Boyeau, Pierre and Wolf, F Alexander and Yakubova, Nafissa and G{\"u}nnemann, Stephan and Trapnell, Cole and Lopez-Paz, David and Theis, Fabian J}, + year = {2023}, + month = jun, + journal = {Molecular Systems Biology}, + volume = {19}, + number = {6}, + pages = {e11517}, + publisher = {John Wiley \& Sons, Ltd}, + issn = {1744-4292}, + doi = {10.15252/msb.202211517}, + urldate = {2024-10-17}, + abstract = {Recent advances in multiplexed single-cell transcriptomics experiments facilitate the high-throughput study of drug and genetic perturbations. However, an exhaustive exploration of the combinatorial perturbation space is experimentally unfeasible. Therefore, computational methods are needed to predict, interpret, and prioritize perturbations. Here, we present the compositional perturbation autoencoder (CPA), which combines the interpretability of linear models with the flexibility of deep-learning approaches for single-cell response modeling. CPA learns to in silico predict transcriptional perturbation response at the single-cell level for unseen dosages, cell types, time points, and species. Using newly generated single-cell drug combination data, we validate that CPA can predict unseen drug combinations while outperforming baseline models. Additionally, the architecture's modularity enables incorporating the chemical representation of the drugs, allowing the prediction of cellular response to completely unseen drugs. Furthermore, CPA is also applicable to genetic combinatorial screens. We demonstrate this by imputing in silico 5,329 missing combinations (97.6\% of all possibilities) in a single-cell Perturb-seq experiment with diverse genetic interactions. We envision CPA will facilitate efficient experimental design and hypothesis generation by enabling in silico response prediction at the single-cell level and thus accelerate therapeutic applications using single-cell technologies.}, + keywords = {generative modeling,high-throughput screening,machine learning,perturbation prediction,single-cell transcriptomics}, + file = {/Users/hhakem/Zotero/storage/S6S2ZBTF/Lotfollahi et al. - 2023 - Predicting cellular responses to complex perturbat.pdf} +} + +@article{loveModeratedEstimationFold2014, + title = {Moderated Estimation of Fold Change and Dispersion for {{RNA-seq}} Data with {{DESeq2}}}, + author = {Love, Michael I. and Huber, Wolfgang and Anders, Simon}, + year = {2014}, + month = dec, + journal = {Genome Biology}, + volume = {15}, + number = {12}, + pages = {550}, + issn = {1474-760X}, + doi = {10.1186/s13059-014-0550-8}, + urldate = {2024-11-04}, + abstract = {In comparative high-throughput sequencing assays, a fundamental task is the analysis of count data, such as read counts per gene in RNA-seq, for evidence of systematic changes across experimental conditions. Small replicate numbers, discreteness, large dynamic range and the presence of outliers require a suitable statistical approach. We present DESeq2, a method for differential analysis of count data, using shrinkage estimation for dispersions and fold changes to improve stability and interpretability of estimates. This enables a more quantitative analysis focused on the strength rather than the mere presence of differential expression. The DESeq2 package is available at http://www.bioconductor.org/packages/release/bioc/html/DESeq2.html.}, + langid = {english}, + keywords = {DESeq2 Package,Differential Expression Analysis,Negative Binomial Generalize Linear Model,Observe Fisher Information,Read Count}, + file = {/Users/hhakem/Zotero/storage/5MU9HUQA/Love et al. - 2014 - Moderated estimation of fold change and dispersion.pdf} +} + +@article{luckReferenceMapHuman2020, + title = {A Reference Map of the Human Binary Protein Interactome}, + author = {Luck, Katja and Kim, Dae-Kyum and Lambourne, Luke and Spirohn, Kerstin and Begg, Bridget E. and Bian, Wenting and Brignall, Ruth and Cafarelli, Tiziana and {Campos-Laborie}, Francisco J. and Charloteaux, Benoit and Choi, Dongsic and Cot{\'e}, Atina G. and Daley, Meaghan and Deimling, Steven and Desbuleux, Alice and Dricot, Am{\'e}lie and Gebbia, Marinella and Hardy, Madeleine F. and Kishore, Nishka and Knapp, Jennifer J. and Kov{\'a}cs, Istv{\'a}n A. and Lemmens, Irma and Mee, Miles W. and Mellor, Joseph C. and Pollis, Carl and Pons, Carles and Richardson, Aaron D. and Schlabach, Sadie and Teeking, Bridget and Yadav, Anupama and Babor, Mariana and Balcha, Dawit and Basha, Omer and {Bowman-Colin}, Christian and Chin, Suet-Feung and Choi, Soon Gang and Colabella, Claudia and Coppin, Georges and D'Amata, Cassandra and De Ridder, David and De Rouck, Steffi and {Duran-Frigola}, Miquel and Ennajdaoui, Hanane and Goebels, Florian and Goehring, Liana and Gopal, Anjali and Haddad, Ghazal and Hatchi, Elodie and Helmy, Mohamed and Jacob, Yves and Kassa, Yoseph and Landini, Serena and Li, Roujia and {van Lieshout}, Natascha and MacWilliams, Andrew and Markey, Dylan and Paulson, Joseph N. and Rangarajan, Sudharshan and Rasla, John and Rayhan, Ashyad and Rolland, Thomas and {San-Miguel}, Adriana and Shen, Yun and Sheykhkarimli, Dayag and Sheynkman, Gloria M. and Simonovsky, Eyal and Ta{\c s}an, Murat and Tejeda, Alexander and Tropepe, Vincent and Twizere, Jean-Claude and Wang, Yang and Weatheritt, Robert J. and Weile, Jochen and Xia, Yu and Yang, Xinping and {Yeger-Lotem}, Esti and Zhong, Quan and Aloy, Patrick and Bader, Gary D. and De Las Rivas, Javier and Gaudet, Suzanne and Hao, Tong and Rak, Janusz and Tavernier, Jan and Hill, David E. and Vidal, Marc and Roth, Frederick P. and Calderwood, Michael A.}, + year = {2020}, + month = apr, + journal = {Nature}, + volume = {580}, + number = {7803}, + pages = {402--408}, + publisher = {Nature Publishing Group}, + issn = {1476-4687}, + doi = {10.1038/s41586-020-2188-x}, + urldate = {2024-07-23}, + abstract = {Global insights into cellular organization and genome function require comprehensive understanding of the interactome networks that mediate genotype--phenotype relationships1,2. Here we present a human `all-by-all' reference interactome map of human binary protein interactions, or `HuRI'. With approximately 53,000 protein--protein interactions, HuRI has approximately four times as many such interactions as there are high-quality curated interactions from small-scale studies. The integration of HuRI with genome3, transcriptome4 and proteome5 data enables cellular function to be studied within most physiological or pathological cellular contexts. We demonstrate the utility of HuRI in identifying the specific subcellular roles of protein--protein interactions. Inferred tissue-specific networks reveal general principles for the formation of cellular context-specific functions and elucidate potential molecular mechanisms that might underlie tissue-specific phenotypes of Mendelian diseases. HuRI is a systematic proteome-wide reference that links genomic variation to phenotypic outcomes.}, + copyright = {2020 The Author(s), under exclusive licence to Springer Nature Limited}, + langid = {english}, + keywords = {Biochemical networks,Data integration,High-throughput screening}, + file = {/Users/hhakem/Zotero/storage/P7WHWID5/Luck et al. - 2020 - A reference map of the human binary protein intera.pdf} +} + +@misc{makelovPrincipledEvaluationsSparse2024, + title = {Towards {{Principled Evaluations}} of {{Sparse Autoencoders}} for {{Interpretability}} and {{Control}}}, + author = {Makelov, Aleksandar and Lange, George and Nanda, Neel}, + year = {2024}, + month = may, + number = {arXiv:2405.08366}, + eprint = {2405.08366}, + publisher = {arXiv}, + urldate = {2024-11-02}, + abstract = {Disentangling model activations into meaningful features is a central problem in interpretability. However, the absence of ground-truth for these features in realistic scenarios makes validating recent approaches, such as sparse dictionary learning, elusive. To address this challenge, we propose a framework for evaluating feature dictionaries in the context of specific tasks, by comparing them against {\textbackslash}emph\{supervised\} feature dictionaries. First, we demonstrate that supervised dictionaries achieve excellent approximation, control, and interpretability of model computations on the task. Second, we use the supervised dictionaries to develop and contextualize evaluations of unsupervised dictionaries along the same three axes. We apply this framework to the indirect object identification (IOI) task using GPT-2 Small, with sparse autoencoders (SAEs) trained on either the IOI or OpenWebText datasets. We find that these SAEs capture interpretable features for the IOI task, but they are less successful than supervised features in controlling the model. Finally, we observe two qualitative phenomena in SAE training: feature occlusion (where a causally relevant concept is robustly overshadowed by even slightly higher-magnitude ones in the learned features), and feature over-splitting (where binary features split into many smaller, less interpretable features). We hope that our framework will provide a useful step towards more objective and grounded evaluations of sparse dictionary learning methods.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Machine Learning}, + file = {/Users/hhakem/Zotero/storage/C2BNDBSV/Makelov et al. - 2024 - Towards Principled Evaluations of Sparse Autoencod.pdf;/Users/hhakem/Zotero/storage/HJEJPSBX/2405.html} +} + +@misc{makhzaniAdversarialAutoencoders2016, + title = {Adversarial {{Autoencoders}}}, + author = {Makhzani, Alireza and Shlens, Jonathon and Jaitly, Navdeep and Goodfellow, Ian and Frey, Brendan}, + year = {2016}, + month = may, + number = {arXiv:1511.05644}, + eprint = {1511.05644}, + primaryclass = {cs}, + publisher = {arXiv}, + urldate = {2024-07-09}, + abstract = {In this paper, we propose the ``adversarial autoencoder'' (AAE), which is a probabilistic autoencoder that uses the recently proposed generative adversarial networks (GAN) to perform variational inference by matching the aggregated posterior of the hidden code vector of the autoencoder with an arbitrary prior distribution. Matching the aggregated posterior to the prior ensures that generating from any part of prior space results in meaningful samples. As a result, the decoder of the adversarial autoencoder learns a deep generative model that maps the imposed prior to the data distribution. We show how the adversarial autoencoder can be used in applications such as semi-supervised classification, disentangling style and content of images, unsupervised clustering, dimensionality reduction and data visualization. We performed experiments on MNIST, Street View House Numbers and Toronto Face datasets and show that adversarial autoencoders achieve competitive results in generative modeling and semi-supervised classification tasks.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Machine Learning,UnRead}, + file = {/Users/hhakem/Zotero/storage/W2ZFU2ZP/Makhzani et al. - 2016 - Adversarial Autoencoders.pdf} +} + +@misc{mathieuDeepMultiscaleVideo2016a, + title = {Deep Multi-Scale Video Prediction beyond Mean Square Error}, + author = {Mathieu, Michael and Couprie, Camille and LeCun, Yann}, + year = {2016}, + month = feb, + number = {arXiv:1511.05440}, + eprint = {1511.05440}, + primaryclass = {cs, stat}, + publisher = {arXiv}, + urldate = {2024-07-08}, + abstract = {Learning to predict future images from a video sequence involves the construction of an internal representation that models the image evolution accurately, and therefore, to some degree, its content and dynamics. This is why pixel-space video prediction may be viewed as a promising avenue for unsupervised feature learning. In addition, while optical flow has been a very studied problem in computer vision for a long time, future frame prediction is rarely approached. Still, many vision applications could benefit from the knowledge of the next frames of videos, that does not require the complexity of tracking every pixel trajectory. In this work, we train a convolutional network to generate future frames given an input sequence. To deal with the inherently blurry predictions obtained from the standard Mean Squared Error (MSE) loss function, we propose three different and complementary feature learning strategies: a multi-scale architecture, an adversarial training method, and an image gradient difference loss function. We compare our predictions to different published results based on recurrent neural networks on the UCF101 dataset.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning,Statistics - Machine Learning,UnRead}, + file = {/Users/hhakem/Zotero/storage/U2JTWTPE/Mathieu et al. - 2016 - Deep multi-scale video prediction beyond mean squa.pdf} +} + +@misc{meschederWhichTrainingMethods2018, + title = {Which {{Training Methods}} for {{GANs}} Do Actually {{Converge}}?}, + author = {Mescheder, Lars and Geiger, Andreas and Nowozin, Sebastian}, + year = {2018}, + month = jul, + number = {arXiv:1801.04406}, + eprint = {1801.04406}, + primaryclass = {cs}, + publisher = {arXiv}, + urldate = {2024-10-03}, + abstract = {Recent work has shown local convergence of GAN training for absolutely continuous data and generator distributions. In this paper, we show that the requirement of absolute continuity is necessary: we describe a simple yet prototypical counterexample showing that in the more realistic case of distributions that are not absolutely continuous, unregularized GAN training is not always convergent. Furthermore, we discuss regularization strategies that were recently proposed to stabilize GAN training. Our analysis shows that GAN training with instance noise or zerocentered gradient penalties converges. On the other hand, we show that Wasserstein-GANs and WGAN-GP with a finite number of discriminator updates per generator update do not always converge to the equilibrium point. We discuss these results, leading us to a new explanation for the stability problems of GAN training. Based on our analysis, we extend our convergence results to more general GANs and prove local convergence for simplified gradient penalties even if the generator and data distributions lie on lower dimensional manifolds. We find these penalties to work well in practice and use them to learn highresolution generative image models for a variety of datasets with little hyperparameter tuning.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Artificial Intelligence,Computer Science - Computer Science and Game Theory,Computer Science - Machine Learning}, + file = {/Users/hhakem/Zotero/storage/BIPEFFDS/Mescheder et al. - 2018 - Which Training Methods for GANs do actually Conver.pdf} +} + +@misc{metosinWhyIsnFunctional2019, + title = {Why {{Isn}}'t {{Functional Programming}} the {{Norm}}? -- {{Richard Feldman}}}, + shorttitle = {Why {{Isn}}'t {{Functional Programming}} the {{Norm}}?}, + author = {{Metosin}}, + year = {2019}, + month = sep, + urldate = {2024-07-25}, + abstract = {Richard is a member of the Elm core team, the author of Elm in Action from Manning Publications, and the instructor for the Intro to Elm and Advanced Elm courses on Frontend Masters. He's been writing Elm since 2014, and is the maintainer of several open-source Elm packages including elm-test and elm-css packages.} +} + +@inproceedings{mikolovDistributedRepresentationsWords2013, + title = {Distributed {{Representations}} of {{Words}} and {{Phrases}} and Their {{Compositionality}}}, + booktitle = {Advances in {{Neural Information Processing Systems}}}, + author = {Mikolov, Tomas and Sutskever, Ilya and Chen, Kai and Corrado, Greg S and Dean, Jeff}, + year = {2013}, + volume = {26}, + publisher = {Curran Associates, Inc.}, + urldate = {2024-11-14}, + abstract = {The recently introduced continuous Skip-gram model is an efficient method for learning high-quality distributed vector representations that capture a large number of precise syntactic and semantic word relationships. In this paper we present several improvements that make the Skip-gram model more expressive and enable it to learn higher quality vectors more rapidly. We show that by subsampling frequent words we obtain significant speedup, and also learn higher quality representations as measured by our tasks. We also introduce Negative Sampling, a simplified variant of Noise Contrastive Estimation (NCE) that learns more accurate vectors for frequent words compared to the hierarchical softmax. An inherent limitation of word representations is their indifference to word order and their inability to represent idiomatic phrases. For example, the meanings of Canada'' and "Air'' cannot be easily combined to obtain "Air Canada''. Motivated by this example, we present a simple and efficient method for finding phrases, and show that their vector representations can be accurately learned by the Skip-gram model. "}, + file = {/Users/hhakem/Zotero/storage/R2L6ZCF2/Mikolov et al. - 2013 - Distributed Representations of Words and Phrases a.pdf} +} + +@misc{millerExplanationArtificialIntelligence2018, + title = {Explanation in {{Artificial Intelligence}}: {{Insights}} from the {{Social Sciences}}}, + shorttitle = {Explanation in {{Artificial Intelligence}}}, + author = {Miller, Tim}, + year = {2018}, + month = aug, + number = {arXiv:1706.07269}, + eprint = {1706.07269}, + publisher = {arXiv}, + urldate = {2024-10-21}, + abstract = {There has been a recent resurgence in the area of explainable artificial intelligence as researchers and practitioners seek to make their algorithms more understandable. Much of this research is focused on explicitly explaining decisions or actions to a human observer, and it should not be controversial to say that looking at how humans explain to each other can serve as a useful starting point for explanation in artificial intelligence. However, it is fair to say that most work in explainable artificial intelligence uses only the researchers' intuition of what constitutes a `good' explanation. There exists vast and valuable bodies of research in philosophy, psychology, and cognitive science of how people define, generate, select, evaluate, and present explanations, which argues that people employ certain cognitive biases and social expectations towards the explanation process. This paper argues that the field of explainable artificial intelligence should build on this existing research, and reviews relevant papers from philosophy, cognitive psychology/science, and social psychology, which study these topics. It draws out some important findings, and discusses ways that these can be infused with work on explainable artificial intelligence.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Artificial Intelligence}, + file = {/Users/hhakem/Zotero/storage/4CNIGTBF/Miller - 2018 - Explanation in Artificial Intelligence Insights f.pdf;/Users/hhakem/Zotero/storage/K3SVYWFY/1706.html} +} + +@article{Minimax2024, + title = {Minimax}, + year = {2024}, + month = jun, + journal = {Wikipedia}, + urldate = {2024-07-08}, + abstract = {Minimax (sometimes Minmax, MM or saddle point) is a decision rule used in artificial intelligence, decision theory, game theory, statistics, and philosophy for minimizing the possible loss for a worst case (maximum loss) scenario. When dealing with gains, it is referred to as "maximin" -- to maximize the minimum gain. Originally formulated for several-player zero-sum game theory, covering both the cases where players take alternate moves and those where they make simultaneous moves, it has also been extended to more complex games and to general decision-making in the presence of uncertainty.}, + copyright = {Creative Commons Attribution-ShareAlike License}, + langid = {english}, + keywords = {Read}, + annotation = {Page Version ID: 1231892435}, + file = {/Users/hhakem/Zotero/storage/5U3K9CJK/Minimax.html} +} + +@book{molnarChapterInterpretabilityInterpretable, + title = {Chapter 3 {{Interpretability}} {\textbar} {{Interpretable Machine Learning}}}, + author = {Molnar, Christoph}, + urldate = {2024-10-21}, + abstract = {Machine learning algorithms usually operate as black boxes and it is unclear how they derived a certain decision. This book is a guide for practitioners to make machine learning decisions interpretable.}, + file = {/Users/hhakem/Zotero/storage/N4EZ9IKP/interpretability.html} +} + +@article{moorFoundationModelsGeneralist2023, + title = {Foundation Models for Generalist Medical Artificial Intelligence}, + author = {Moor, Michael and Banerjee, Oishi and Abad, Zahra Shakeri Hossein and Krumholz, Harlan M. and Leskovec, Jure and Topol, Eric J. and Rajpurkar, Pranav}, + year = {2023}, + month = apr, + journal = {Nature}, + volume = {616}, + number = {7956}, + pages = {259--265}, + publisher = {Nature Publishing Group}, + issn = {1476-4687}, + doi = {10.1038/s41586-023-05881-4}, + urldate = {2024-11-02}, + abstract = {The exceptionally rapid development of highly flexible, reusable artificial intelligence (AI) models is likely to usher in newfound capabilities in medicine. We propose a new paradigm for medical AI, which we refer to as generalist medical AI (GMAI). GMAI models will be capable of carrying out a diverse set of tasks using very little or no task-specific labelled data. Built through self-supervision on large, diverse datasets, GMAI will flexibly interpret different combinations of medical modalities, including data from imaging, electronic health records, laboratory results, genomics, graphs or medical text. Models will in turn produce expressive outputs such as free-text explanations, spoken recommendations or image annotations that demonstrate advanced medical reasoning abilities. Here we identify a set of high-impact potential applications for GMAI and lay out specific technical capabilities and training datasets necessary to enable them. We expect that GMAI-enabled applications will challenge current strategies for regulating and validating AI devices for medicine and will shift practices associated with the collection of large medical datasets.}, + copyright = {2023 Springer Nature Limited}, + langid = {english}, + keywords = {Computational biology and bioinformatics,Health care}, + file = {/Users/hhakem/Zotero/storage/8J3PN5F2/Moor et al. - 2023 - Foundation models for generalist medical artificia.pdf} +} + +@misc{moorMedFlamingoMultimodalMedical2023, + title = {Med-{{Flamingo}}: A {{Multimodal Medical Few-shot Learner}}}, + shorttitle = {Med-{{Flamingo}}}, + author = {Moor, Michael and Huang, Qian and Wu, Shirley and Yasunaga, Michihiro and Zakka, Cyril and Dalmia, Yash and Reis, Eduardo Pontes and Rajpurkar, Pranav and Leskovec, Jure}, + year = {2023}, + month = jul, + number = {arXiv:2307.15189}, + eprint = {2307.15189}, + publisher = {arXiv}, + urldate = {2024-11-03}, + abstract = {Medicine, by its nature, is a multifaceted domain that requires the synthesis of information across various modalities. Medical generative vision-language models (VLMs) make a first step in this direction and promise many exciting clinical applications. However, existing models typically have to be fine-tuned on sizeable down-stream datasets, which poses a significant limitation as in many medical applications data is scarce, necessitating models that are capable of learning from few examples in real-time. Here we propose Med-Flamingo, a multimodal few-shot learner adapted to the medical domain. Based on OpenFlamingo-9B, we continue pre-training on paired and interleaved medical image-text data from publications and textbooks. Med-Flamingo unlocks few-shot generative medical visual question answering (VQA) abilities, which we evaluate on several datasets including a novel challenging open-ended VQA dataset of visual USMLE-style problems. Furthermore, we conduct the first human evaluation for generative medical VQA where physicians review the problems and blinded generations in an interactive app. Med-Flamingo improves performance in generative medical VQA by up to 20{\textbackslash}\% in clinician's rating and firstly enables multimodal medical few-shot adaptations, such as rationale generation. We release our model, code, and evaluation app under https://github.com/snap-stanford/med-flamingo.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Artificial Intelligence,Computer Science - Computer Vision and Pattern Recognition}, + file = {/Users/hhakem/Zotero/storage/3RIB7WIW/Moor et al. - 2023 - Med-Flamingo a Multimodal Medical Few-shot Learne.pdf;/Users/hhakem/Zotero/storage/DSFPD58B/2307.html} +} + +@article{moshkovLearningRepresentationsImagebased2024, + title = {Learning Representations for Image-Based Profiling of Perturbations}, + author = {Moshkov, Nikita and Bornholdt, Michael and Benoit, Santiago and Smith, Matthew and McQuin, Claire and Goodman, Allen and Senft, Rebecca A. and Han, Yu and Babadi, Mehrtash and Horvath, Peter and Cimini, Beth A. and Carpenter, Anne E. and Singh, Shantanu and Caicedo, Juan C.}, + year = {2024}, + month = feb, + journal = {Nature Communications}, + volume = {15}, + number = {1}, + pages = {1594}, + publisher = {Nature Publishing Group}, + issn = {2041-1723}, + doi = {10.1038/s41467-024-45999-1}, + urldate = {2024-10-19}, + abstract = {Measuring the phenotypic effect of treatments on cells through imaging assays is an efficient and powerful way of studying cell biology, and requires computational methods for transforming images into quantitative data. Here, we present an improved strategy for learning representations of treatment effects from high-throughput imaging, following a causal interpretation. We use weakly supervised learning for modeling associations between images and treatments, and show that it encodes both confounding factors and phenotypic features in the learned representation. To facilitate their separation, we constructed a large training dataset with images from five different studies to maximize experimental diversity, following insights from our causal analysis. Training a model with this dataset successfully improves downstream performance, and produces a reusable convolutional network for image-based profiling, which we call Cell Painting CNN. We evaluated our strategy on three publicly available Cell Painting datasets, and observed that the Cell Painting CNN improves performance in downstream analysis up to 30\% with respect to classical features, while also being more computationally efficient.}, + copyright = {2024 The Author(s)}, + langid = {english}, + keywords = {Image processing,Machine learning,Phenotypic screening}, + file = {/Users/hhakem/Zotero/storage/JWYZ9ZEP/Moshkov et al. - 2024 - Learning representations for image-based profiling.pdf} +} + +@article{moshkovLearningRepresentationsImagebased2024a, + title = {Learning Representations for Image-Based Profiling of Perturbations}, + author = {Moshkov, Nikita and Bornholdt, Michael and Benoit, Santiago and Smith, Matthew and McQuin, Claire and Goodman, Allen and Senft, Rebecca A. and Han, Yu and Babadi, Mehrtash and Horvath, Peter and Cimini, Beth A. and Carpenter, Anne E. and Singh, Shantanu and Caicedo, Juan C.}, + year = {2024}, + month = feb, + journal = {Nature Communications}, + volume = {15}, + number = {1}, + pages = {1594}, + publisher = {Nature Publishing Group}, + issn = {2041-1723}, + doi = {10.1038/s41467-024-45999-1}, + urldate = {2024-10-19}, + abstract = {Measuring the phenotypic effect of treatments on cells through imaging assays is an efficient and powerful way of studying cell biology, and requires computational methods for transforming images into quantitative data. Here, we present an improved strategy for learning representations of treatment effects from high-throughput imaging, following a causal interpretation. We use weakly supervised learning for modeling associations between images and treatments, and show that it encodes both confounding factors and phenotypic features in the learned representation. To facilitate their separation, we constructed a large training dataset with images from five different studies to maximize experimental diversity, following insights from our causal analysis. Training a model with this dataset successfully improves downstream performance, and produces a reusable convolutional network for image-based profiling, which we call Cell Painting CNN. We evaluated our strategy on three publicly available Cell Painting datasets, and observed that the Cell Painting CNN improves performance in downstream analysis up to 30\% with respect to classical features, while also being more computationally efficient.}, + copyright = {2024 The Author(s)}, + langid = {english}, + keywords = {Image processing,Machine learning,Phenotypic screening}, + file = {/Users/hhakem/Zotero/storage/F6CBB23V/Moshkov et al. - 2024 - Learning representations for image-based profiling.pdf} +} + +@article{murdochDefinitionsMethodsApplications2019, + title = {Definitions, Methods, and Applications in Interpretable Machine Learning}, + author = {Murdoch, W. James and Singh, Chandan and Kumbier, Karl and {Abbasi-Asl}, Reza and Yu, Bin}, + year = {2019}, + month = oct, + journal = {Proceedings of the National Academy of Sciences of the United States of America}, + volume = {116}, + number = {44}, + pages = {22071}, + doi = {10.1073/pnas.1900654116}, + urldate = {2024-10-21}, + abstract = {The recent surge in interpretability research has led to confusion on numerous fronts. In particular, it is unclear what it means to be interpretable and how to select, evaluate, or even discuss methods for producing interpretations of ...}, + langid = {english}, + pmid = {31619572}, + file = {/Users/hhakem/Zotero/storage/BBPSHV4J/Murdoch et al. - 2019 - Definitions, methods, and applications in interpre.pdf} +} + +@misc{Naive_classiAuto4JupyterLab, + title = {Naive\_classi{\dots} (Auto-4 : 7) - {{JupyterLab}}}, + urldate = {2024-07-24}, + howpublished = {http://localhost:8888/lab/workspaces/auto-4/tree/workspace/analysis/naive\_classifier\_profiles.ipynb}, + file = {/Users/hhakem/Zotero/storage/WIVMBTN2/naive_classifier_profiles.html} +} + +@misc{Naive_classiJupyterLab, + title = {Naive\_classi{\dots} (3) - {{JupyterLab}}}, + urldate = {2024-08-07}, + howpublished = {http://localhost:8889/lab/tree/workspace/analysis/naive\_classifier\_profiles.ipynb}, + file = {/Users/hhakem/Zotero/storage/G8TLYNIP/naive_classifier_profiles.html} +} + +@misc{nanowellNanowellAdEMAMixOptimizerPytorch2024, + title = {Nanowell/{{AdEMAMix-Optimizer-Pytorch}}}, + author = {{nanowell}}, + year = {2024}, + month = sep, + urldate = {2024-09-13}, + abstract = {The AdEMAMix Optimizer: Better, Faster, Older.}, + copyright = {MIT}, + keywords = {ademamix,artificial-intelligence,deep-neural-networks,machine-learning,multimodal,optimizer,pytorch} +} + +@misc{NeuralNetworksZero, + title = {Neural {{Networks}}: {{Zero To Hero}}}, + urldate = {2024-07-03}, + howpublished = {https://karpathy.ai/zero-to-hero.html}, + keywords = {UnRead}, + file = {/Users/hhakem/Zotero/storage/VSVQHFQR/zero-to-hero.html} +} + +@misc{nilforoshanZeroshotCausalLearning2024, + title = {Zero-Shot Causal Learning}, + author = {Nilforoshan, Hamed and Moor, Michael and Roohani, Yusuf and Chen, Yining and {\v S}urina, Anja and Yasunaga, Michihiro and Oblak, Sara and Leskovec, Jure}, + year = {2024}, + month = feb, + number = {arXiv:2301.12292}, + eprint = {2301.12292}, + publisher = {arXiv}, + urldate = {2024-11-02}, + abstract = {Predicting how different interventions will causally affect a specific individual is important in a variety of domains such as personalized medicine, public policy, and online marketing. There are a large number of methods to predict the effect of an existing intervention based on historical data from individuals who received it. However, in many settings it is important to predict the effects of novel interventions (e.g., a newly invented drug), which these methods do not address. Here, we consider zero-shot causal learning: predicting the personalized effects of a novel intervention. We propose CaML, a causal meta-learning framework which formulates the personalized prediction of each intervention's effect as a task. CaML trains a single meta-model across thousands of tasks, each constructed by sampling an intervention, its recipients, and its nonrecipients. By leveraging both intervention information (e.g., a drug's attributes) and individual features{\textasciitilde}(e.g., a patient's history), CaML is able to predict the personalized effects of novel interventions that do not exist at the time of training. Experimental results on real world datasets in large-scale medical claims and cell-line perturbations demonstrate the effectiveness of our approach. Most strikingly, {\textbackslash}method's zero-shot predictions outperform even strong baselines trained directly on data from the test interventions.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Artificial Intelligence,Computer Science - Computers and Society,Computer Science - Human-Computer Interaction,Computer Science - Machine Learning}, + file = {/Users/hhakem/Zotero/storage/I9QD4WQL/Nilforoshan et al. - 2024 - Zero-shot causal learning.pdf;/Users/hhakem/Zotero/storage/46B23EIK/2301.html} +} + +@misc{nilforoshanZeroshotCausalLearning2024a, + title = {Zero-Shot Causal Learning}, + author = {Nilforoshan, Hamed and Moor, Michael and Roohani, Yusuf and Chen, Yining and {\v S}urina, Anja and Yasunaga, Michihiro and Oblak, Sara and Leskovec, Jure}, + year = {2024}, + month = feb, + number = {arXiv:2301.12292}, + eprint = {2301.12292}, + publisher = {arXiv}, + urldate = {2024-11-02}, + abstract = {Predicting how different interventions will causally affect a specific individual is important in a variety of domains such as personalized medicine, public policy, and online marketing. There are a large number of methods to predict the effect of an existing intervention based on historical data from individuals who received it. However, in many settings it is important to predict the effects of novel interventions (e.g., a newly invented drug), which these methods do not address. Here, we consider zero-shot causal learning: predicting the personalized effects of a novel intervention. We propose CaML, a causal meta-learning framework which formulates the personalized prediction of each intervention's effect as a task. CaML trains a single meta-model across thousands of tasks, each constructed by sampling an intervention, its recipients, and its nonrecipients. By leveraging both intervention information (e.g., a drug's attributes) and individual features{\textasciitilde}(e.g., a patient's history), CaML is able to predict the personalized effects of novel interventions that do not exist at the time of training. Experimental results on real world datasets in large-scale medical claims and cell-line perturbations demonstrate the effectiveness of our approach. Most strikingly, {\textbackslash}method's zero-shot predictions outperform even strong baselines trained directly on data from the test interventions.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Artificial Intelligence,Computer Science - Computers and Society,Computer Science - Human-Computer Interaction,Computer Science - Machine Learning}, + file = {/Users/hhakem/Zotero/storage/WPPBJNYB/Nilforoshan et al. - 2024 - Zero-shot causal learning.pdf;/Users/hhakem/Zotero/storage/TD7KRQVN/2301.html} +} + +@misc{NovoDesignProtein, + title = {De Novo Design of Protein Structure and Function with {{RFdiffusion}} {\textbar} {{Nature}}}, + urldate = {2024-11-05}, + howpublished = {https://www.nature.com/articles/s41586-023-06415-8}, + file = {/Users/hhakem/Zotero/storage/YISNK6B2/s41586-023-06415-8.html} +} + +@misc{NucleotideTransformerBuilding, + title = {The {{Nucleotide Transformer}}: {{Building}} and {{Evaluating Robust Foundation Models}} for {{Human Genomics}} {\textbar} {{bioRxiv}}}, + urldate = {2024-10-17}, + howpublished = {https://www.biorxiv.org/content/10.1101/2023.01.11.523679v4.full}, + file = {/Users/hhakem/Zotero/storage/P2EF6IIG/2023.01.11.523679v4.html} +} + +@article{olahZoomIntroductionCircuits2020, + title = {Zoom {{In}}: {{An Introduction}} to {{Circuits}}}, + shorttitle = {Zoom {{In}}}, + author = {Olah, Chris and Cammarata, Nick and Schubert, Ludwig and Goh, Gabriel and Petrov, Michael and Carter, Shan}, + year = {2020}, + month = mar, + journal = {Distill}, + volume = {5}, + number = {3}, + pages = {10.23915/distill.00024.001}, + issn = {2476-0757}, + doi = {10.23915/distill.00024.001}, + urldate = {2024-11-12} +} + +@article{outeiralCodonLanguageEmbeddings2024, + title = {Codon Language Embeddings Provide Strong Signals for Use in Protein Engineering}, + author = {Outeiral, Carlos and Deane, Charlotte M.}, + year = {2024}, + month = feb, + journal = {Nature Machine Intelligence}, + volume = {6}, + number = {2}, + pages = {170--179}, + publisher = {Nature Publishing Group}, + issn = {2522-5839}, + doi = {10.1038/s42256-024-00791-0}, + urldate = {2024-10-29}, + abstract = {Protein representations from deep language models have yielded state-of-the-art performance across many tasks in computational protein engineering. In recent years, progress has primarily focused on parameter count, with recent models' capacities surpassing the size of the very datasets they were trained on. Here we propose an alternative direction. We show that large language models trained on codons, instead of amino acid sequences, provide high-quality representations that outperform comparable state-of-the-art models across a variety of tasks. In some tasks, such as species recognition, prediction of protein and transcript abundance or melting point estimation, we show that a language model trained on codons outperforms every other published protein language model, including some that contain over 50 times more parameters. These results indicate that, in addition to commonly studied scale and model complexity, the information content of biological data provides an orthogonal direction to improve the power of machine learning in biology.}, + copyright = {2024 The Author(s)}, + langid = {english}, + keywords = {Genomics,Protein folding,Protein function predictions}, + file = {/Users/hhakem/Zotero/storage/YNNGABGM/Outeiral and Deane - 2024 - Codon language embeddings provide strong signals f.pdf} +} + +@misc{pagliardiniAdEMAMixOptimizerBetter2024, + title = {The {{AdEMAMix Optimizer}}: {{Better}}, {{Faster}}, {{Older}}}, + shorttitle = {The {{AdEMAMix Optimizer}}}, + author = {Pagliardini, Matteo and Ablin, Pierre and Grangier, David}, + year = {2024}, + month = sep, + number = {arXiv:2409.03137}, + eprint = {2409.03137}, + primaryclass = {cs, stat}, + publisher = {arXiv}, + urldate = {2024-09-13}, + abstract = {Momentum based optimizers are central to a wide range of machine learning applications. These typically rely on an Exponential Moving Average (EMA) of gradients, which decays exponentially the present contribution of older gradients. This accounts for gradients being local linear approximations which lose their relevance as the iterate moves along the loss landscape. This work questions the use of a single EMA to accumulate past gradients and empirically demonstrates how this choice can be sub-optimal: a single EMA cannot simultaneously give a high weight to the immediate past, and a non-negligible weight to older gradients. Building on this observation, we propose AdEMAMix, a simple modification of the Adam optimizer with a mixture of two EMAs to better take advantage of past gradients. Our experiments on language modeling and image classification show---quite surprisingly---that gradients can stay relevant for tens of thousands of steps. They help to converge faster, and often to lower minima: e.g., a 1.3B parameter AdEMAMix LLM trained on 101B tokens performs comparably to an AdamW model trained on 197B tokens (+95\%). Moreover, our method significantly slowsdown model forgetting during training. Our work motivates further exploration of different types of functions to leverage past gradients, beyond EMAs.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}, + file = {/Users/hhakem/Zotero/storage/ZDL9PESI/Pagliardini et al. - 2024 - The AdEMAMix Optimizer Better, Faster, Older.pdf} +} + +@misc{pagliardiniAdEMAMixOptimizerBetter2024a, + title = {The {{AdEMAMix Optimizer}}: {{Better}}, {{Faster}}, {{Older}}}, + shorttitle = {The {{AdEMAMix Optimizer}}}, + author = {Pagliardini, Matteo and Ablin, Pierre and Grangier, David}, + year = {2024}, + month = sep, + number = {arXiv:2409.03137}, + eprint = {2409.03137}, + primaryclass = {cs, stat}, + publisher = {arXiv}, + doi = {10.48550/arXiv.2409.03137}, + urldate = {2024-09-13}, + abstract = {Momentum based optimizers are central to a wide range of machine learning applications. These typically rely on an Exponential Moving Average (EMA) of gradients, which decays exponentially the present contribution of older gradients. This accounts for gradients being local linear approximations which lose their relevance as the iterate moves along the loss landscape. This work questions the use of a single EMA to accumulate past gradients and empirically demonstrates how this choice can be sub-optimal: a single EMA cannot simultaneously give a high weight to the immediate past, and a non-negligible weight to older gradients. Building on this observation, we propose AdEMAMix, a simple modification of the Adam optimizer with a mixture of two EMAs to better take advantage of past gradients. Our experiments on language modeling and image classification show -- quite surprisingly -- that gradients can stay relevant for tens of thousands of steps. They help to converge faster, and often to lower minima: e.g., a \$1.3\$B parameter AdEMAMix LLM trained on \$101\$B tokens performs comparably to an AdamW model trained on \$197\$B tokens (\$+95{\textbackslash}\%\$). Moreover, our method significantly slows-down model forgetting during training. Our work motivates further exploration of different types of functions to leverage past gradients, beyond EMAs.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}, + file = {/Users/hhakem/Zotero/storage/5SMB24HD/Pagliardini et al. - 2024 - The AdEMAMix Optimizer Better, Faster, Older.pdf;/Users/hhakem/Zotero/storage/2UPKQ4AD/2409.html} +} + +@misc{palmaPredictingCellMorphological2023, + title = {Predicting Cell Morphological Responses to Perturbations Using Generative Modeling}, + author = {Palma, Alessandro and Theis, Fabian J. and Lotfollahi, Mohammad}, + year = {2023}, + month = jul, + primaryclass = {New Results}, + pages = {2023.07.17.549216}, + publisher = {bioRxiv}, + doi = {10.1101/2023.07.17.549216}, + urldate = {2024-07-18}, + abstract = {Advancements in high-throughput screening have enabled the exploration of rich phenotypic readouts like high-content microscopy, expediting drug target identification and mode of action studies. However, scaling these experiments to the vast space of drug or genetic manipulations poses challenges, as only a small subset of compounds show activity in screenings. Despite being widely used in various applications, machine learning methods have not shown a reliable ability to extrapolate predictions to scenarios involving unseen phenomena, specifically transforming an unseen control cell image into a desired perturbation. We present a generative model, the IMage Perturbation Autoencoder (IMPA), which predicts cellular morphological effects of chemical and genetic perturbations using untreated cells as input. IMPA learns perturbation-specific styles from generalized embeddings and generates counterfactual treatment response predictions in control cells. We demonstrate IMPA can predict morphological changes caused by small molecule perturbations on breast cancer cells. Additionally, we test IMPA on the unseen drug effect prediction task, showing improved performance over state-of-the-art generative models when compounds are structurally related to the training set. Finally, generalizability and capability to predict more subtle effects are showcased through its application to large microscopy datasets with hundreds of genetic perturbations on U2OS cells. We envision IMPA to become a valuable tool in computational microscopy for aiding phenotypic drug discovery, facilitating navigation of the perturbation space, and rational experimental design.}, + archiveprefix = {bioRxiv}, + chapter = {New Results}, + copyright = {{\copyright} 2023, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution-NonCommercial-NoDerivs 4.0 International), CC BY-NC-ND 4.0, as described at http://creativecommons.org/licenses/by-nc-nd/4.0/}, + langid = {english}, + keywords = {UnRead}, + file = {/Users/hhakem/Zotero/storage/QJZB2XZB/Palma et al. - 2023 - Predicting cell morphological responses to perturb.pdf} +} + +@misc{phdFeatureAttributionExplainable2022, + title = {Feature {{Attribution}} in {{Explainable AI}}}, + author = {PhD, Gatha Varma}, + year = {2022}, + month = apr, + journal = {Geek Culture}, + urldate = {2024-07-03}, + abstract = {Model explanations in the form of feature importance. What is it \& how is it achieved?}, + langid = {english}, + keywords = {Feature-attribution,Read}, + file = {/Users/hhakem/Zotero/storage/Y2487GX7/feature-attribution-in-explainable-ai-626f0a1d95e2.html} +} + +@misc{PhenDiffRevealingInvisible, + title = {{{PhenDiff}}: {{Revealing Invisible Phenotypes}} with {{Conditional Diffusion Models}}}, + urldate = {2024-08-13}, + howpublished = {https://arxiv.org/html/2312.08290v1/\#S6}, + file = {/Users/hhakem/Zotero/storage/U5592AHK/2312.08290v1.html} +} + +@misc{Piximi, + title = {Piximi}, + urldate = {2024-12-06}, + howpublished = {http://localhost:3000/}, + file = {/Users/hhakem/Zotero/storage/ATSJGYGD/localhost.html} +} + +@misc{PracticalCommonLisp, + title = {Practical {{Common Lisp}}}, + urldate = {2024-08-20}, + howpublished = {https://gigamonkeys.com/book/}, + file = {/Users/hhakem/Zotero/storage/3MLS4IAY/book.html} +} + +@misc{PredictingMultipleConformations, + title = {Predicting Multiple Conformations via Sequence Clustering and {{AlphaFold2}} {\textbar} {{Nature}}}, + urldate = {2024-11-05}, + howpublished = {https://www.nature.com/articles/s41586-023-06832-9}, + file = {/Users/hhakem/Zotero/storage/UVPFTMVQ/s41586-023-06832-9.html} +} + +@misc{ProteinConformationalSwitches, + title = {Protein {{Conformational Switches}}: {{From Nature}} to {{Design}} - {{PMC}}}, + urldate = {2024-11-05}, + howpublished = {https://pmc.ncbi.nlm.nih.gov/articles/PMC3404493/}, + file = {/Users/hhakem/Zotero/storage/A3WDAK8W/PMC3404493.html} +} + +@misc{PythonSuperConsidered2011, + title = {Python's Super() Considered Super!}, + year = {2011}, + month = may, + journal = {Deep Thoughts by Raymond Hettinger}, + urldate = {2024-08-15}, + abstract = {If you aren't wowed by Python's super() builtin, chances are you don't really know what it is capable of doing or how to use it effectively. Much has been written about super() and much of that wri{\dots}}, + langid = {english}, + file = {/Users/hhakem/Zotero/storage/HVV4L2EG/super-considered-super.html} +} + +@article{rahimiDOTFlexibleMultiobjective2024, + title = {{{DOT}}: A Flexible Multi-Objective Optimization Framework for Transferring Features across Single-Cell and Spatial Omics}, + shorttitle = {{{DOT}}}, + author = {Rahimi, Arezou and {Vale-Silva}, Luis A and F{\"a}lth Savitski, Maria and Tanevski, Jovan and {Saez-Rodriguez}, Julio}, + year = {2024}, + month = jun, + journal = {Nature communications}, + volume = {15}, + number = {1}, + pages = {4994}, + issn = {2041-1723}, + doi = {10.1038/s41467-024-48868-z}, + urldate = {2024-11-04}, + abstract = {Single-cell transcriptomics and spatially-resolved imaging/sequencing technologies have revolutionized biomedical research. However, they suffer from lack of spatial information and a trade-off of resolution and gene coverage, respectively. We propose DOT, a multi-objective optimization framework for transferring cellular features across these data modalities, thus integrating their complementary information. DOT uses genes beyond those common to the data modalities, exploits the local spatial context, transfers spatial features beyond cell-type information, and infers absolute/relative abundance of cell populations at tissue locations. Thus, DOT bridges single-cell transcriptomics data with both high- and low-resolution spatially-resolved data. Moreover, DOT combines practical aspects related to cell composition, heterogeneity, technical effects, and integration of prior knowledge. Our fast implementation based on the Frank-Wolfe algorithm achieves state-of-the-art or improved performance in localizing cell features in high- and low-resolution spatial data and estimating the expression of unmeasured genes in low-coverage spatial data.}, + copyright = {cc by}, + langid = {english}, + pmid = {38862466}, + file = {/Users/hhakem/Zotero/storage/F23BNEC5/Rahimi et al. - 2024 - DOT a flexible multi-objective optimization frame.pdf} +} + +@misc{ramosReviewLargeLanguage2024a, + title = {A {{Review}} of {{Large Language Models}} and {{Autonomous Agents}} in {{Chemistry}}}, + author = {Ramos, Mayk Caldas and Collison, Christopher J. and White, Andrew D.}, + year = {2024}, + month = jun, + number = {arXiv:2407.01603}, + eprint = {2407.01603}, + primaryclass = {physics}, + publisher = {arXiv}, + urldate = {2024-07-08}, + abstract = {Large language models (LLMs) are emerging as a powerful tool in chemistry across multiple domains. In chemistry, LLMs are able to accurately predict properties, design new molecules, optimize synthesis pathways, and accelerate drug and material discovery. A core emerging idea is combining LLMs with chemistry-specific tools like synthesis planners and databases, leading to so-called ``agents.'' This review covers LLMs' recent history, current capabilities, design, challenges specific to chemistry, and future directions. Particular attention is given to agents and their emergence as a cross-chemistry paradigm. Agents have proven effective in diverse domains of chemistry, but challenges remain. It is unclear if creating domain-specific versus generalist agents and developing autonomous pipelines versus "co-pilot" systems will accelerate chemistry. An emerging direction is the development of multi-agent systems using a human-in-the-loop approach. Due to the incredibly fast development of this field, a repository has been built to keep track of the latest studies: https: //github.com/ur-whitelab/LLMs-in-science.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Artificial Intelligence,Computer Science - Computation and Language,Computer Science - Machine Learning,Physics - Chemical Physics,UnRead}, + file = {/Users/hhakem/Zotero/storage/V36VQ4JA/Ramos et al. - 2024 - A Review of Large Language Models and Autonomous A.pdf} +} + +@misc{ribeiroWhyShouldTrust2016, + title = {"{{Why Should I Trust You}}?": {{Explaining}} the {{Predictions}} of {{Any Classifier}}}, + shorttitle = {"{{Why Should I Trust You}}?}, + author = {Ribeiro, Marco Tulio and Singh, Sameer and Guestrin, Carlos}, + year = {2016}, + month = aug, + number = {arXiv:1602.04938}, + eprint = {1602.04938}, + publisher = {arXiv}, + urldate = {2024-10-21}, + abstract = {Despite widespread adoption, machine learning models remain mostly black boxes. Understanding the reasons behind predictions is, however, quite important in assessing trust, which is fundamental if one plans to take action based on a prediction, or when choosing whether to deploy a new model. Such understanding also provides insights into the model, which can be used to transform an untrustworthy model or prediction into a trustworthy one. In this work, we propose LIME, a novel explanation technique that explains the predictions of any classifier in an interpretable and faithful manner, by learning an interpretable model locally around the prediction. We also propose a method to explain models by presenting representative individual predictions and their explanations in a non-redundant way, framing the task as a submodular optimization problem. We demonstrate the flexibility of these methods by explaining different models for text (e.g. random forests) and image classification (e.g. neural networks). We show the utility of explanations via novel experiments, both simulated and with human subjects, on various scenarios that require trust: deciding if one should trust a prediction, choosing between models, improving an untrustworthy classifier, and identifying why a classifier should not be trusted.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning,Statistics - Machine Learning}, + file = {/Users/hhakem/Zotero/storage/CU2EBS5E/Ribeiro et al. - 2016 - Why Should I Trust You Explaining the Predicti.pdf;/Users/hhakem/Zotero/storage/EP2TSP9V/1602.html} +} + +@misc{richardChatNTMultimodalConversational2024, + title = {{{ChatNT}}: {{A Multimodal Conversational Agent}} for {{DNA}}, {{RNA}} and {{Protein Tasks}}}, + shorttitle = {{{ChatNT}}}, + author = {Richard, Guillaume and de Almeida, Bernardo P. and {Dalla-Torre}, Hugo and Blum, Christopher and Hexemer, Lorenz and Pandey, Priyanka and Laurent, Stefan and Lopez, Marie and Laterre, Alexandre and Lang, Maren and {\c S}ahin, U{\u g}ur and Beguir, Karim and Pierrot, Thomas}, + year = {2024}, + month = sep, + primaryclass = {New Results}, + pages = {2024.04.30.591835}, + publisher = {bioRxiv}, + doi = {10.1101/2024.04.30.591835}, + urldate = {2024-10-17}, + abstract = {Language models are thriving, powering conversational agents that assist and empower humans to solve a number of tasks. Recently, these models were extended to support additional modalities including vision, audio and video, demonstrating impressive capabilities across multiple domains including healthcare. Still, conversational agents remain limited in biology as they cannot yet fully comprehend biological sequences. On the other hand, high-performance foundation models for biological sequences have been built through self-supervision over sequencing data, but these need to be fine-tuned for each specific application, preventing transfer and generalization between tasks. In addition, these models are not conversational which limits their utility to users with coding capabilities. In this paper, we propose to bridge the gap between biology foundation models and conversational agents by introducing ChatNT, the first multimodal conversational agent with an advanced understanding of biological sequences. ChatNT achieves new state-of-the-art results on the Nucleotide Transformer benchmark while being able to solve all tasks at once, in English, and to generalize to unseen questions. In addition, we have curated a new set of more biologically relevant instructions tasks from DNA, RNA and proteins, spanning multiple species, tissues and biological processes. ChatNT reaches performance on par with state-of-the-art specialized methods on those tasks. We also present a novel perplexity-based technique to help calibrate the confidence of our model predictions. Our framework for genomics instruction-tuning can be easily extended to more tasks and biological data modalities (e.g. structure, imaging), making it a widely applicable tool for biology. ChatNT is the first model of its kind and constitutes an initial step towards building generally capable agents that understand biology from first principles while being accessible to users with no coding background.}, + archiveprefix = {bioRxiv}, + chapter = {New Results}, + copyright = {{\copyright} 2024, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution-NonCommercial-NoDerivs 4.0 International), CC BY-NC-ND 4.0, as described at http://creativecommons.org/licenses/by-nc-nd/4.0/}, + langid = {english}, + file = {/Users/hhakem/Zotero/storage/DBIRF2EV/Richard et al. - 2024 - ChatNT A Multimodal Conversational Agent for DNA,.pdf} +} + +@inproceedings{richardsonEncodingStyleStyleGAN2021, + title = {Encoding in {{Style}}: A {{StyleGAN Encoder}} for {{Image-to-Image Translation}}}, + shorttitle = {Encoding in {{Style}}}, + booktitle = {2021 {{IEEE}}/{{CVF Conference}} on {{Computer Vision}} and {{Pattern Recognition}} ({{CVPR}})}, + author = {Richardson, Elad and Alaluf, Yuval and Patashnik, Or and Nitzan, Yotam and Azar, Yaniv and Shapiro, Stav and {Cohen-Or}, Daniel}, + year = {2021}, + month = jun, + pages = {2287--2296}, + issn = {2575-7075}, + doi = {10.1109/CVPR46437.2021.00232}, + urldate = {2024-07-10}, + abstract = {We present a generic image-to-image translation framework, pixel2style2pixel (pSp). Our pSp framework is based on a novel encoder network that directly generates a series of style vectors which are fed into a pretrained StyleGAN generator, forming the extended {\textbackslash}mathcalW + latent space. We first show that our encoder can directly embed real images into {\textbackslash}mathcalW + , with no additional optimization. Next, we propose utilizing our encoder to directly solve image-to-image translation tasks, defining them as encoding problems from some input domain into the latent domain. By deviating from the standard "invert first, edit later" methodology used with previous StyleGAN encoders, our approach can handle a variety of tasks even when the input image is not represented in the StyleGAN domain. We show that solving translation tasks through StyleGAN significantly simplifies the training process, as no adversary is required, has better support for solving tasks without pixel-to-pixel correspondence, and inherently supports multi-modal synthesis via the resampling of styles. Finally, we demonstrate the potential of our framework on a variety of facial image-to-image translation tasks, even when compared to state-of-the-art solutions designed specifically for a single task, and further show that it can be extended beyond the human facial domain. Code is available at https://github.com/eladrich/pixel2style2pixel.}, + keywords = {Computer architecture,Computer vision,Decoding,Generators,Image coding,Pattern recognition,Training,UnRead}, + file = {/Users/hhakem/Zotero/storage/IEFQIG8W/Richardson et al. - 2021 - Encoding in Style a StyleGAN Encoder for Image-to.pdf;/Users/hhakem/Zotero/storage/FLLDVGYE/9578137.html} +} + +@inproceedings{rossRightRightReasons2017, + title = {Right for the {{Right Reasons}}: {{Training Differentiable Models}} by {{Constraining}} Their {{Explanations}}}, + shorttitle = {Right for the {{Right Reasons}}}, + booktitle = {Proceedings of the {{Twenty-Sixth International Joint Conference}} on {{Artificial Intelligence}}}, + author = {Ross, Andrew Slavin and Hughes, Michael C. and {Doshi-Velez}, Finale}, + year = {2017}, + month = aug, + pages = {2662--2670}, + publisher = {International Joint Conferences on Artificial Intelligence Organization}, + address = {Melbourne, Australia}, + doi = {10.24963/ijcai.2017/371}, + urldate = {2024-10-21}, + abstract = {Neural networks are among the most accurate supervised learning methods in use today. However, their opacity makes them difficult to trust in critical applications, especially if conditions in training may differ from those in test. Recent work on explanations for black-box models has produced tools (e.g. LIME) to show the implicit rules behind predictions. These tools can help us identify when models are right for the wrong reasons. However, these methods do not scale to explaining entire datasets and cannot correct the problems they reveal. We introduce a method for efficiently explaining and regularizing differentiable models by examining and selectively penalizing their input gradients. We apply these penalties both based on expert annotation and in an unsupervised fashion that produces multiple classifiers with qualitatively different decision boundaries. On multiple datasets, we show our approach generates faithful explanations and models that generalize much better when conditions differ between training and test.}, + isbn = {978-0-9992411-0-3}, + langid = {english}, + file = {/Users/hhakem/Zotero/storage/PSZZUAYY/Ross et al. - 2017 - Right for the Right Reasons Training Differentiab.pdf} +} + +@article{rotemVisualInterpretabilityBioimaging2024, + title = {Visual Interpretability of Bioimaging Deep Learning Models}, + author = {Rotem, Oded and Zaritsky, Assaf}, + year = {2024}, + month = aug, + journal = {Nature Methods}, + volume = {21}, + number = {8}, + pages = {1394--1397}, + publisher = {Nature Publishing Group}, + issn = {1548-7105}, + doi = {10.1038/s41592-024-02322-6}, + urldate = {2024-09-16}, + abstract = {The success of deep learning in analyzing bioimages comes at the expense of biologically meaningful interpretations. We review the state of the art of explainable artificial intelligence (XAI) in bioimaging and discuss its potential in hypothesis generation and data-driven discovery.}, + copyright = {2024 Springer Nature America, Inc.}, + langid = {english}, + keywords = {Image processing,Machine learning}, + file = {/Users/hhakem/Zotero/storage/MNKCF55E/Rotem and Zaritsky - 2024 - Visual interpretability of bioimaging deep learnin.pdf} +} + +@article{rotemVisualInterpretabilityBioimaging2024a, + title = {Visual Interpretability of Bioimaging Deep Learning Models}, + author = {Rotem, Oded and Zaritsky, Assaf}, + year = {2024}, + month = aug, + journal = {Nature Methods}, + volume = {21}, + number = {8}, + pages = {1394--1397}, + publisher = {Nature Publishing Group}, + issn = {1548-7105}, + doi = {10.1038/s41592-024-02322-6}, + urldate = {2024-10-25}, + abstract = {The success of deep learning in analyzing bioimages comes at the expense of biologically meaningful interpretations. We review the state of the art of explainable artificial intelligence (XAI) in bioimaging and discuss its potential in hypothesis generation and data-driven discovery.}, + copyright = {2024 Springer Nature America, Inc.}, + langid = {english}, + keywords = {Image processing,Machine learning}, + file = {/Users/hhakem/Zotero/storage/FWGQVJ9F/Rotem and Zaritsky - 2024 - Visual interpretability of bioimaging deep learnin.pdf} +} + +@misc{salimansImprovedTechniquesTraining2016, + title = {Improved {{Techniques}} for {{Training GANs}}}, + author = {Salimans, Tim and Goodfellow, Ian and Zaremba, Wojciech and Cheung, Vicki and Radford, Alec and Chen, Xi}, + year = {2016}, + month = jun, + number = {arXiv:1606.03498}, + eprint = {1606.03498}, + primaryclass = {cs}, + publisher = {arXiv}, + urldate = {2024-07-09}, + abstract = {We present a variety of new architectural features and training procedures that we apply to the generative adversarial networks (GANs) framework. We focus on two applications of GANs: semi-supervised learning, and the generation of images that humans find visually realistic. Unlike most work on generative models, our primary goal is not to train a model that assigns high likelihood to test data, nor do we require the model to be able to learn well without using any labels. Using our new techniques, we achieve state-of-the-art results in semi-supervised classification on MNIST, CIFAR-10 and SVHN. The generated images are of high quality as confirmed by a visual Turing test: our model generates MNIST samples that humans cannot distinguish from real data, and CIFAR-10 samples that yield a human error rate of 21.3\%. We also present ImageNet samples with unprecedented resolution and show that our methods enable the model to learn recognizable features of ImageNet classes.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning,Computer Science - Neural and Evolutionary Computing,Read}, + file = {/Users/hhakem/Zotero/storage/JBXY4G9A/Salimans et al. - 2016 - Improved Techniques for Training GANs.pdf} +} + +@article{sandfortDataAugmentationUsing2019, + title = {Data Augmentation Using Generative Adversarial Networks ({{CycleGAN}}) to Improve Generalizability in {{CT}} Segmentation Tasks}, + author = {Sandfort, Veit and Yan, Ke and Pickhardt, Perry J. and Summers, Ronald M.}, + year = {2019}, + month = nov, + journal = {Scientific Reports}, + volume = {9}, + number = {1}, + pages = {16884}, + publisher = {Nature Publishing Group}, + issn = {2045-2322}, + doi = {10.1038/s41598-019-52737-x}, + urldate = {2024-07-03}, + abstract = {Labeled medical imaging data is scarce and expensive to generate. To achieve generalizable deep learning models large amounts of data are needed. Standard data augmentation is a method to increase generalizability and is routinely performed. Generative adversarial networks offer a novel method for data augmentation. We evaluate the use of CycleGAN for data augmentation in CT segmentation tasks. Using a large image database we trained a CycleGAN to transform contrast CT images into non-contrast images. We then used the trained CycleGAN to augment our training using these synthetic non-contrast images. We compared the segmentation performance of a U-Net trained on the original dataset compared to a U-Net trained on the combined dataset of original data and synthetic non-contrast images. We further evaluated the U-Net segmentation performance on two separate datasets: The original contrast CT dataset on which segmentations were created and a second dataset from a different hospital containing only non-contrast CTs. We refer to these 2 separate datasets as the in-distribution and out-of-distribution datasets, respectively. We show that in several CT segmentation tasks performance is improved significantly, especially in out-of-distribution (noncontrast CT) data. For example, when training the model with standard augmentation techniques, performance of segmentation of the kidneys on out-of-distribution non-contrast images was dramatically lower than for in-distribution data (Dice score of 0.09 vs. 0.94 for out-of-distribution vs. in-distribution data, respectively, p\,{$<$}\,0.001). When the kidney model was trained with CycleGAN augmentation techniques, the out-of-distribution (non-contrast) performance increased dramatically (from a Dice score of 0.09 to 0.66, p\,{$<$}\,0.001). Improvements for the liver and spleen were smaller, from 0.86 to 0.89 and 0.65 to 0.69, respectively. We believe this method will be valuable to medical imaging researchers to reduce manual segmentation effort and cost in CT imaging.}, + copyright = {2019 This is a U.S. government work and not under copyright protection in the U.S.; foreign copyright protection may apply}, + langid = {english}, + keywords = {Diagnostic markers,Image processing,Read}, + file = {/Users/hhakem/Zotero/storage/QYUKMGVM/Sandfort et al. - 2019 - Data augmentation using generative adversarial net.pdf} +} + +@misc{saplakogluHowAIRevolutionized2024, + title = {How {{AI Revolutionized Protein Science}}, but {{Didn}}'t {{End It}}}, + author = {Saplakoglu, Yasemin}, + year = {2024}, + month = jun, + journal = {Quanta Magazine}, + urldate = {2024-07-15}, + abstract = {Three years ago, Google's AlphaFold pulled off the biggest artificial intelligence breakthrough in science to date, accelerating molecular research and kindling deep questions about why we do science.}, + howpublished = {https://www.quantamagazine.org/how-ai-revolutionized-protein-science-but-didnt-end-it-20240626/}, + langid = {english}, + file = {/Users/hhakem/Zotero/storage/2TIJJSWT/how-ai-revolutionized-protein-science-but-didnt-end-it-20240626.html} +} + +@misc{schrodCODEXCOunterfactualDeep2024, + title = {{{CODEX}}: {{COunterfactual Deep}} Learning for the in-Silico {{EXploration}} of Cancer Cell Line Perturbations}, + shorttitle = {{{CODEX}}}, + author = {Schrod, Stefan and Bei{\ss}barth, Tim and Zacharias, Helena U. and Hauschild, Anne-Christin and Altenbuchinger, Michael}, + year = {2024}, + month = jan, + primaryclass = {New Results}, + pages = {2024.01.24.577020}, + publisher = {bioRxiv}, + doi = {10.1101/2024.01.24.577020}, + urldate = {2024-07-18}, + abstract = {Motivation High-throughput screens (HTS) provide a powerful tool to decipher the causal effects of chemical and genetic perturbations on cancer cell lines. Their ability to evaluate a wide spectrum of interventions, from single drugs to intricate drug combinations and CRISPR-interference, has established them as an invaluable resource for the development of novel therapeutic approaches. Nevertheless, the combinatorial complexity of potential interventions makes a comprehensive exploration intractable. Hence, prioritizing interventions for further experimental investigation becomes of utmost importance. Results We propose CODEX as a general framework for the causal modeling of HTS data, linking perturbations to their downstream consequences. CODEX relies on a stringent causal modeling strategy based on counterfactual reasoning. As such, CODEX predicts drug-specific cellular responses, comprising cell survival and molecular alterations, and facilitates the in-silico exploration of drug combinations. This is achieved for both bulk and single-cell HTS. We further show that CODEX provides a rationale to explore complex genetic modifications from CRISPR-interference in silico in single cells. Availability and Implementation Our implementation of CODEX is publicly available at https://github.com/sschrod/CODEX. All data used in this article are publicly available.}, + archiveprefix = {bioRxiv}, + chapter = {New Results}, + copyright = {{\copyright} 2024, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution 4.0 International), CC BY 4.0, as described at http://creativecommons.org/licenses/by/4.0/}, + langid = {english}, + keywords = {UnRead}, + file = {/Users/hhakem/Zotero/storage/P3WV6G2X/Schrod et al. - 2024 - CODEX COunterfactual Deep learning for the in-sil.pdf} +} + +@misc{SegmentNTAnnotatingGenome, + title = {{{SegmentNT}}: Annotating the Genome at Single-Nucleotide Resolution with {{DNA}} Foundation Models {\textbar} {{bioRxiv}}}, + urldate = {2024-10-27}, + howpublished = {https://www.biorxiv.org/content/10.1101/2024.03.14.584712v2}, + file = {/Users/hhakem/Zotero/storage/VFEC9928/2024.03.14.html} +} + +@inproceedings{selvarajuGradCAMVisualExplanations2017, + title = {Grad-{{CAM}}: {{Visual Explanations}} from {{Deep Networks}} via {{Gradient-Based Localization}}}, + shorttitle = {Grad-{{CAM}}}, + booktitle = {2017 {{IEEE International Conference}} on {{Computer Vision}} ({{ICCV}})}, + author = {Selvaraju, Ramprasaath R. and Cogswell, Michael and Das, Abhishek and Vedantam, Ramakrishna and Parikh, Devi and Batra, Dhruv}, + year = {2017}, + month = oct, + pages = {618--626}, + issn = {2380-7504}, + doi = {10.1109/ICCV.2017.74}, + urldate = {2024-10-25}, + abstract = {We propose a technique for producing `visual explanations' for decisions from a large class of Convolutional Neural Network (CNN)-based models, making them more transparent. Our approach - Gradient-weighted Class Activation Mapping (Grad-CAM), uses the gradients of any target concept (say logits for `dog' or even a caption), flowing into the final convolutional layer to produce a coarse localization map highlighting the important regions in the image for predicting the concept. Unlike previous approaches, Grad- CAM is applicable to a wide variety of CNN model-families: (1) CNNs with fully-connected layers (e.g. VGG), (2) CNNs used for structured outputs (e.g. captioning), (3) CNNs used in tasks with multi-modal inputs (e.g. visual question answering) or reinforcement learning, without architectural changes or re-training. We combine Grad-CAM with existing fine-grained visualizations to create a high-resolution class-discriminative visualization, Guided Grad-CAM, and apply it to image classification, image captioning, and visual question answering (VQA) models, including ResNet-based architectures. In the context of image classification models, our visualizations (a) lend insights into failure modes of these models (showing that seemingly unreasonable predictions have reasonable explanations), (b) outperform previous methods on the ILSVRC-15 weakly-supervised localization task, (c) are more faithful to the underlying model, and (d) help achieve model generalization by identifying dataset bias. For image captioning and VQA, our visualizations show even non-attention based models can localize inputs. Finally, we design and conduct human studies to measure if Grad-CAM explanations help users establish appropriate trust in predictions from deep networks and show that Grad-CAM helps untrained users successfully discern a `stronger' deep network from a `weaker' one even when both make identical predictions. Our code is available at https: //github.com/ramprs/grad-cam/ along with a demo on CloudCV [2] and video at youtu.be/COjUB9Izk6E.}, + keywords = {Cats,Computer architecture,Dogs,Knowledge discovery,Visualization}, + file = {/Users/hhakem/Zotero/storage/G6V4AH4Q/Selvaraju et al. - 2017 - Grad-CAM Visual Explanations from Deep Networks v.pdf;/Users/hhakem/Zotero/storage/D2W9EZ3Z/8237336.html} +} + +@misc{shrikumarLearningImportantFeatures2019, + title = {Learning {{Important Features Through Propagating Activation Differences}}}, + author = {Shrikumar, Avanti and Greenside, Peyton and Kundaje, Anshul}, + year = {2019}, + month = oct, + number = {arXiv:1704.02685}, + eprint = {1704.02685}, + primaryclass = {cs}, + publisher = {arXiv}, + urldate = {2024-07-03}, + abstract = {The purported ``black box'' nature of neural networks is a barrier to adoption in applications where interpretability is essential. Here we present DeepLIFT (Deep Learning Important FeaTures), a method for decomposing the output prediction of a neural network on a specific input by backpropagating the contributions of all neurons in the network to every feature of the input. DeepLIFT compares the activation of each neuron to its `reference activation' and assigns contribution scores according to the difference. By optionally giving separate consideration to positive and negative contributions, DeepLIFT can also reveal dependencies which are missed by other approaches. Scores can be computed efficiently in a single backward pass. We apply DeepLIFT to models trained on MNIST and simulated genomic data, and show significant advantages over gradient-based methods. Video tutorial: http://goo.gl/qKb7pL, ICML slides: bit.ly/deeplifticmlslides, ICML talk: https://vimeo.com/238275076, code: http://goo.gl/RM8jvH.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning,Computer Science - Neural and Evolutionary Computing,Read}, + file = {/Users/hhakem/Zotero/storage/IB54PAHI/Shrikumar et al. - 2019 - Learning Important Features Through Propagating Ac.pdf} +} + +@article{SideEffectComputer2024, + title = {Side Effect (Computer Science)}, + year = {2024}, + month = jul, + journal = {Wikipedia}, + urldate = {2024-07-25}, + abstract = {In computer science, an operation, function or expression is said to have a side effect if it has any observable effect other than its primary effect of reading the value of its arguments and returning a value to the invoker of the operation. Example side effects include modifying a non-local variable, a static local variable or a mutable argument passed by reference; raising errors or exceptions; performing I/O; or calling other functions with side-effects. In the presence of side effects, a program's behaviour may depend on history; that is, the order of evaluation matters. Understanding and debugging a function with side effects requires knowledge about the context and its possible histories. Side effects play an important role in the design and analysis of programming languages. The degree to which side effects are used depends on the programming paradigm. For example, imperative programming is commonly used to produce side effects, to update a system's state. By contrast, declarative programming is commonly used to report on the state of system, without side effects. Functional programming aims to minimize or eliminate side effects. The lack of side effects makes it easier to do formal verification of a program. The functional language Haskell eliminates side effects such as I/O and other stateful computations by replacing them with monadic actions. Functional languages such as Standard ML, Scheme and Scala do not restrict side effects, but it is customary for programmers to avoid them. Assembly language programmers must be aware of hidden side effects---instructions that modify parts of the processor state which are not mentioned in the instruction's mnemonic. A classic example of a hidden side effect is an arithmetic instruction that implicitly modifies condition codes (a hidden side effect) while it explicitly modifies a register (the intended effect). One potential drawback of an instruction set with hidden side effects is that, if many instructions have side effects on a single piece of state, like condition codes, then the logic required to update that state sequentially may become a performance bottleneck. The problem is particularly acute on some processors designed with pipelining (since 1990) or with out-of-order execution. Such a processor may require additional control circuitry to detect hidden side effects and stall the pipeline if the next instruction depends on the results of those effects.}, + copyright = {Creative Commons Attribution-ShareAlike License}, + langid = {english}, + annotation = {Page Version ID: 1235041233}, + file = {/Users/hhakem/Zotero/storage/9EHH3NVE/Side_effect_(computer_science).html} +} + +@misc{simonInterPLMDiscoveringInterpretable2024, + title = {{{InterPLM}}: {{Discovering Interpretable Features}} in {{Protein Language Models}} via {{Sparse Autoencoders}}}, + shorttitle = {{{InterPLM}}}, + author = {Simon, Elana and Zou, James}, + year = {2024}, + month = nov, + primaryclass = {New Results}, + pages = {2024.11.14.623630}, + publisher = {bioRxiv}, + doi = {10.1101/2024.11.14.623630}, + urldate = {2024-11-24}, + abstract = {Protein language models (PLMs) have demonstrated remarkable success in protein modeling and design, yet their internal mechanisms for predicting structure and function remain poorly understood. Here we present a systematic approach to extract and analyze interpretable features from PLMs using sparse autoencoders (SAEs). By training SAEs on embeddings from the PLM ESM-2, we identify up to 2,548 human-interpretable latent features per layer that strongly correlate with up to 143 known biological concepts such as binding sites, structural motifs, and functional domains. In contrast, examining individual neurons in ESM-2 reveals up to 46 neurons per layer with clear conceptual alignment across 15 known concepts, suggesting that PLMs represent most concepts in superposition. Beyond capturing known annotations, we show that ESM-2 learns coherent concepts that do not map onto existing annotations and propose a pipeline using language models to automatically interpret novel latent features learned by the SAEs. As practical applications, we demonstrate how these latent features can fill in missing annotations in protein databases and enable targeted steering of protein sequence generation. Our results demonstrate that PLMs encode rich, interpretable representations of protein biology and we propose a systematic framework to extract and analyze these latent features. In the process, we recover both known biology and potentially new protein motifs. As community resources, we introduce InterPLM (interPLM.ai), an interactive visualization platform for exploring and analyzing learned PLM features, and release code for training and analysis at github.com/ElanaPearl/interPLM.}, + archiveprefix = {bioRxiv}, + chapter = {New Results}, + copyright = {{\copyright} 2024, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution-NonCommercial 4.0 International), CC BY-NC 4.0, as described at http://creativecommons.org/licenses/by-nc/4.0/}, + langid = {english} +} + +@misc{songDenoisingDiffusionImplicit2022, + title = {Denoising {{Diffusion Implicit Models}}}, + author = {Song, Jiaming and Meng, Chenlin and Ermon, Stefano}, + year = {2022}, + month = oct, + number = {arXiv:2010.02502}, + eprint = {2010.02502}, + primaryclass = {cs}, + publisher = {arXiv}, + urldate = {2024-10-16}, + abstract = {Denoising diffusion probabilistic models (DDPMs) have achieved high quality image generation without adversarial training, yet they require simulating a Markov chain for many steps in order to produce a sample. To accelerate sampling, we present denoising diffusion implicit models (DDIMs), a more efficient class of iterative implicit probabilistic models with the same training procedure as DDPMs. In DDPMs, the generative process is defined as the reverse of a particular Markovian diffusion process. We generalize DDPMs via a class of non-Markovian diffusion processes that lead to the same training objective. These non-Markovian processes can correspond to generative processes that are deterministic, giving rise to implicit models that produce high quality samples much faster. We empirically demonstrate that DDIMs can produce high quality samples 10{\texttimes} to 50{\texttimes} faster in terms of wall-clock time compared to DDPMs, allow us to trade off computation for sample quality, perform semantically meaningful image interpolation directly in the latent space, and reconstruct observations with very low error.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning}, + file = {/Users/hhakem/Zotero/storage/ZE8TTHFB/Song et al. - 2022 - Denoising Diffusion Implicit Models.pdf} +} + +@article{sturmaUnpairedMultiDomainCausal, + title = {Unpaired {{Multi-Domain Causal Representation Learning}}}, + author = {Sturma, Nils and Drton, Mathias and Squires, Chandler and Uhler, Caroline}, + abstract = {The goal of causal representation learning is to find a representation of data that consists of causally related latent variables. We consider a setup where one has access to data from multiple domains that potentially share a causal representation. Crucially, observations in different domains are assumed to be unpaired, that is, we only observe the marginal distribution in each domain but not their joint distribution. In this paper, we give sufficient conditions for identifiability of the joint distribution and the shared causal graph in a linear setup. Identifiability holds if we can uniquely recover the joint distribution and the shared causal representation from the marginal distributions in each domain. We transform our results into a practical method to recover the shared latent causal graph.}, + langid = {english}, + file = {/Users/hhakem/Zotero/storage/5J5BWB5T/Sturma et al. - Unpaired Multi-Domain Causal Representation Learni.pdf} +} + +@article{sturmfelsVisualizingImpactFeature2020, + title = {Visualizing the {{Impact}} of {{Feature Attribution Baselines}}}, + author = {Sturmfels, Pascal and Lundberg, Scott and Lee, Su-In}, + year = {2020}, + month = jan, + journal = {Distill}, + volume = {5}, + number = {1}, + pages = {e22}, + issn = {2476-0757}, + doi = {10.23915/distill.00022}, + urldate = {2024-08-29}, + abstract = {Exploring the baseline input hyperparameter, and how it impacts interpretations of neural network behavior.}, + langid = {english}, + file = {/Users/hhakem/Zotero/storage/9L29RCZE/attribution-baselines.html} +} + +@article{sturmfelsVisualizingImpactFeature2020a, + title = {Visualizing the {{Impact}} of {{Feature Attribution Baselines}}}, + author = {Sturmfels, Pascal and Lundberg, Scott and Lee, Su-In}, + year = {2020}, + month = jan, + journal = {Distill}, + volume = {5}, + number = {1}, + pages = {e22}, + issn = {2476-0757}, + doi = {10.23915/distill.00022}, + urldate = {2024-10-10}, + abstract = {Exploring the baseline input hyperparameter, and how it impacts interpretations of neural network behavior.}, + langid = {english}, + file = {/Users/hhakem/Zotero/storage/42TS74N5/attribution-baselines.html} +} + +@article{sumidaImprovingProteinExpression2024, + title = {Improving {{Protein Expression}}, {{Stability}}, and {{Function}} with {{ProteinMPNN}}}, + author = {Sumida, Kiera H. and {N{\'u}{\~n}ez-Franco}, Reyes and Kalvet, Indrek and Pellock, Samuel J. and Wicky, Basile I. M. and Milles, Lukas F. and Dauparas, Justas and Wang, Jue and Kipnis, Yakov and Jameson, Noel and Kang, Alex and De La Cruz, Joshmyn and Sankaran, Banumathi and Bera, Asim K. and {Jim{\'e}nez-Os{\'e}s}, Gonzalo and Baker, David}, + year = {2024}, + month = jan, + journal = {Journal of the American Chemical Society}, + volume = {146}, + number = {3}, + pages = {2054--2061}, + publisher = {American Chemical Society}, + issn = {0002-7863}, + doi = {10.1021/jacs.3c10941}, + urldate = {2024-11-17}, + abstract = {Natural proteins are highly optimized for function but are often difficult to produce at a scale suitable for biotechnological applications due to poor expression in heterologous systems, limited solubility, and sensitivity to temperature. Thus, a general method that improves the physical properties of native proteins while maintaining function could have wide utility for protein-based technologies. Here, we show that the deep neural network ProteinMPNN, together with evolutionary and structural information, provides a route to increasing protein expression, stability, and function. For both myoglobin and tobacco etch virus (TEV) protease, we generated designs with improved expression, elevated melting temperatures, and improved function. For TEV protease, we identified multiple designs with improved catalytic activity as compared to the parent sequence and previously reported TEV variants. Our approach should be broadly useful for improving the expression, stability, and function of biotechnologically important proteins.}, + file = {/Users/hhakem/Zotero/storage/5KKXFNTF/Sumida et al. - 2024 - Improving Protein Expression, Stability, and Funct.pdf} +} + +@misc{tahaTransfoRNANavigatingUncertainties2024, + title = {{{TransfoRNA}}: {{Navigating}} the {{Uncertainties}} of {{Small RNA Annotation}} with an {{Adaptive Machine Learning Strategy}}}, + shorttitle = {{{TransfoRNA}}}, + author = {Taha, Yasser and Jehn, Julia and Kahraman, Mustafa and Frank, Maurice and Heuvelman, Marco and Horos, Rastislav and Yau, Christopher and Steinkraus, Bruno and Sikosek, Tobias}, + year = {2024}, + month = jun, + primaryclass = {New Results}, + pages = {2024.06.19.599329}, + publisher = {bioRxiv}, + doi = {10.1101/2024.06.19.599329}, + urldate = {2024-11-04}, + abstract = {Small RNAs hold crucial biological information and have immense diagnostic and therapeutic value. While many established annotation tools focus on microRNAs, there are myriads of other small RNAs that are currently underutilized. These small RNAs can be difficult to annotate, as ground truth is limited and well-established mapping and mismatch rules are lacking. TransfoRNA is a machine learning framework based on Transformers that explores an alternative strategy. It uses common annotation tools to generate a small seed of high-confidence training labels, while then expanding upon those labels iteratively. TransfoRNA learns sequence-specific representations of all RNAs to construct a similarity network which can be interrogated as new RNAs are annotated, allowing to rank RNAs based on their familiarity. While models can be flexibly trained on any RNA dataset, we here present a version trained on TCGA (The Cancer Genome Atlas) small RNA sequences and demonstrate its ability to add annotation confidence to an unrelated dataset, where 21\% of previously unannotated RNAs could be annotated. Relative to its training data, TransfoRNA could boost high-confidence annotations in TCGA by {$\sim$}50\% while providing transparent explanations even for low-confidence ones. It could learn to annotate 97\% of isomiRs from just single examples and confidently identify new members of other familiar classes with high accuracy, while reliably rejecting false RNAs. All source code is available at https://github.com/gitHBDX/TransfoRNA and can be executed at Code Ocean (https://codeocean.com/capsule/5415298/). An interactive website is available at www.transforna.com. Download figureOpen in new tab}, + archiveprefix = {bioRxiv}, + chapter = {New Results}, + copyright = {{\copyright} 2024, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution-NonCommercial-NoDerivs 4.0 International), CC BY-NC-ND 4.0, as described at http://creativecommons.org/licenses/by-nc-nd/4.0/}, + langid = {english}, + file = {/Users/hhakem/Zotero/storage/VR3SVYVK/Taha et al. - 2024 - TransfoRNA Navigating the Uncertainties of Small .pdf} +} + +@misc{Teaching, + title = {Teaching}, + urldate = {2024-10-20}, + howpublished = {https://stanford.edu/{\textasciitilde}shervine/teaching/}, + file = {/Users/hhakem/Zotero/storage/JJWGCX9C/teaching.html} +} + +@misc{telfordNewBreedBiotech2024, + title = {A New Breed of Biotech}, + author = {Telford, Alex}, + year = {2024}, + month = sep, + journal = {Alex's blog}, + urldate = {2024-08-20}, + abstract = {Personal site for posts about my interests: the biotech industry, medicine, molecular biology, neuroscience, biorisk, science, consciousness, AI, innovation, decision making, philosophy, games, sci-fi, probability, and forecasting (among other things). I write to learn, mostly about biotech.}, + howpublished = {https://atelfo.github.io/2024/09/17/a-new-breed-of-biotech.html}, + langid = {english}, + file = {/Users/hhakem/Zotero/storage/AX7BN7YS/a-new-breed-of-biotech.html} +} + +@misc{tianOCT2Confocal3DCycleGAN2024, + title = {{{OCT2Confocal}}: {{3D CycleGAN}} Based {{Translation}} of {{Retinal OCT Images}} to {{Confocal Microscopy}}}, + shorttitle = {{{OCT2Confocal}}}, + author = {Tian, Xin and Anantrasirichai, Nantheera and Nicholson, Lindsay and Achim, Alin}, + year = {2024}, + month = feb, + number = {arXiv:2311.10902}, + eprint = {2311.10902}, + primaryclass = {cs, eess}, + publisher = {arXiv}, + urldate = {2024-07-09}, + abstract = {Optical coherence tomography (OCT) and confocal microscopy are pivotal in retinal imaging, each presenting unique benefits and limitations. In-vivo OCT offers rapid, non-invasive imaging but can be hampered by clarity issues and motion artifacts. Ex-vivo confocal microscopy provides high-resolution, cellular detailed color images but is invasive and poses ethical concerns and potential tissue damage. To bridge these modalities, we developed a 3D CycleGAN framework for unsupervised translation of in-vivo OCT to ex-vivo confocal microscopy images. Applied to our OCT2Confocal dataset, this framework effectively translates between 3D medical data domains, capturing vascular, textural, and cellular details with precision. This marks the first attempt to exploit the inherent 3D information of OCT and translate it into the rich, detailed color domain of confocal microscopy. Assessed through quantitative and qualitative evaluations, the 3D CycleGAN framework demonstrates commendable image fidelity and quality, outperforming existing methods despite the constraints of limited data. This non-invasive generation of retinal confocal images has the potential to further enhance diagnostic and monitoring capabilities in ophthalmology. Our source code and OCT2Confocal dataset are available at https: //github.com/xintian-99/OCT2Confocal.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Computer Vision and Pattern Recognition,Electrical Engineering and Systems Science - Image and Video Processing,UnRead}, + file = {/Users/hhakem/Zotero/storage/FZUGDR6L/Tian et al. - 2024 - OCT2Confocal 3D CycleGAN based Translation of Ret.pdf} +} + +@misc{tkachenkoEvaluatingDatabaseCompression2016, + title = {Evaluating {{Database Compression Methods}}: {{Update}}}, + shorttitle = {Evaluating {{Database Compression Methods}}}, + author = {Tkachenko, Vadim}, + year = {2016}, + month = apr, + journal = {Percona Database Performance Blog}, + urldate = {2024-11-25}, + abstract = {This blog post is an update to our last post discussing database compression methods, and how they stack up against each other.}, + langid = {american}, + file = {/Users/hhakem/Zotero/storage/QPX8UFHQ/evaluating-database-compression-methods-update.html} +} + +@article{uhlerMachineLearningApproaches2022, + title = {Machine {{Learning Approaches}} to {{Single-Cell Data Integration}} and {{Translation}}}, + author = {Uhler, Caroline and Shivashankar, G. V.}, + year = {2022}, + month = may, + journal = {Proceedings of the IEEE}, + volume = {110}, + number = {5}, + pages = {557--576}, + issn = {1558-2256}, + doi = {10.1109/JPROC.2022.3166132}, + urldate = {2024-09-23}, + abstract = {Experimental single-cell data often presents an incomplete picture due to its destructive nature: 1) we collect certain experimental measurements of cells but lack measurements under different experimental conditions or data modalities; 2) we collect data of cells at certain time points but lack measurements at other time points; or 3) we collect data of cells under certain perturbations but lack data for other types of perturbations. In this article, we will discuss machine learning approaches to address these types of translation and counterfactual problems. We will begin by giving an overview on single-cell biology applications and the relevant translation problems. Subsequently, we will provide an overview of approaches for multidomain alignment and translation in machine learning, including methods based on generative modeling, optimal transport, and causal inference. The bulk of this article will focus on how these approaches have been tailored and applied to important translation problems in single-cell biology, illustrated through concrete examples from our own work. We end with open problems and a perspective on how biology may not only be uniquely suited to being one of the greatest beneficiaries of machine learning but also one of the greatest sources of inspiration for it.}, + keywords = {Biology,Causality,Cells (biology),deep learning,DNA,generative modeling,genome organization,imaging,Imaging,Machine learning,optimal transport,Perturbation methods,representation learning,sequencing,Sequential analysis,single-cell biology,spatial transcriptomics,Synthetic biology}, + file = {/Users/hhakem/Zotero/storage/FWTUNX6J/Uhler and Shivashankar - 2022 - Machine Learning Approaches to Single-Cell Data In.pdf;/Users/hhakem/Zotero/storage/YLZ7XGJQ/9762556.html} +} + +@misc{vaswaniAttentionAllYou2023, + title = {Attention {{Is All You Need}}}, + author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N. and Kaiser, Lukasz and Polosukhin, Illia}, + year = {2023}, + month = aug, + number = {arXiv:1706.03762}, + eprint = {1706.03762}, + primaryclass = {cs}, + publisher = {arXiv}, + doi = {10.48550/arXiv.1706.03762}, + urldate = {2024-10-19}, + abstract = {The dominant sequence transduction models are based on complex recurrent or convolutional neural networks in an encoder-decoder configuration. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. Our model achieves 28.4 BLEU on the WMT 2014 English-to-German translation task, improving over the existing best results, including ensembles by over 2 BLEU. On the WMT 2014 English-to-French translation task, our model establishes a new single-model state-of-the-art BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small fraction of the training costs of the best models from the literature. We show that the Transformer generalizes well to other tasks by applying it successfully to English constituency parsing both with large and limited training data.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Computation and Language,Computer Science - Machine Learning}, + file = {/Users/hhakem/Zotero/storage/DR85Q63J/Vaswani et al. - 2023 - Attention Is All You Need.pdf;/Users/hhakem/Zotero/storage/KAMW7I26/1706.html} +} + +@misc{wangAdaptiveWingLoss2020, + title = {Adaptive {{Wing Loss}} for {{Robust Face Alignment}} via {{Heatmap Regression}}}, + author = {Wang, Xinyao and Bo, Liefeng and Fuxin, Li}, + year = {2020}, + month = may, + number = {arXiv:1904.07399}, + eprint = {1904.07399}, + primaryclass = {cs}, + publisher = {arXiv}, + urldate = {2024-10-08}, + abstract = {Heatmap regression with a deep network has become one of the mainstream approaches to localize facial landmarks. However, the loss function for heatmap regression is rarely studied. In this paper, we analyze the ideal loss function properties for heatmap regression in face alignment problems. Then we propose a novel loss function, named Adaptive Wing loss, that is able to adapt its shape to different types of ground truth heatmap pixels. This adaptability penalizes loss more on foreground pixels while less on background pixels. To address the imbalance between foreground and background pixels, we also propose Weighted Loss Map, which assigns high weights on foreground and difficult background pixels to help training process focus more on pixels that are crucial to landmark localization. To further improve face alignment accuracy, we introduce boundary prediction and CoordConv with boundary coordinates. Extensive experiments on different benchmarks, including COFW, 300W and WFLW, show our approach outperforms the state-of-the-art by a significant margin on various evaluation metrics. Besides, the Adaptive Wing loss also helps other heatmap regression tasks. Code will be made publicly available at https://github.com/ protossw512/AdaptiveWingLoss.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Computer Vision and Pattern Recognition}, + file = {/Users/hhakem/Zotero/storage/LCB45T2B/Wang et al. - 2020 - Adaptive Wing Loss for Robust Face Alignment via H.pdf} +} + +@misc{wangRemovingBiasesMolecular2023, + title = {Removing {{Biases}} from {{Molecular Representations}} via {{Information Maximization}}}, + author = {Wang, Chenyu and Gupta, Sharut and Uhler, Caroline and Jaakkola, Tommi}, + year = {2023}, + month = dec, + number = {arXiv:2312.00718}, + eprint = {2312.00718}, + primaryclass = {cs, q-bio}, + publisher = {arXiv}, + urldate = {2024-09-23}, + abstract = {High-throughput drug screening -- using cell imaging or gene expression measurements as readouts of drug effect -- is a critical tool in biotechnology to assess and understand the relationship between the chemical structure and biological activity of a drug. Since large-scale screens have to be divided into multiple experiments, a key difficulty is dealing with batch effects, which can introduce systematic errors and non-biological associations in the data. We propose InfoCORE, an Information maximization approach for COnfounder REmoval, to effectively deal with batch effects and obtain refined molecular representations. InfoCORE establishes a variational lower bound on the conditional mutual information of the latent representations given a batch identifier. It adaptively reweighs samples to equalize their implied batch distribution. Extensive experiments on drug screening data reveal InfoCORE's superior performance in a multitude of tasks including molecular property prediction and molecule-phenotype retrieval. Additionally, we show results for how InfoCORE offers a versatile framework and resolves general distribution shifts and issues of data fairness by minimizing correlation with spurious features or removing sensitive attributes. The code is available at https://github.com/uhlerlab/InfoCORE.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning,Quantitative Biology - Biomolecules}, + file = {/Users/hhakem/Zotero/storage/UN6TKI24/Wang et al. - 2023 - Removing Biases from Molecular Representations via.pdf} +} + +@article{wangSpatialPredictorsImmunotherapy2023, + title = {Spatial Predictors of Immunotherapy Response in Triple-Negative Breast Cancer}, + author = {Wang, Xiao Qian and Danenberg, Esther and Huang, Chiun-Sheng and Egle, Daniel and Callari, Maurizio and Bermejo, Bego{\~n}a and Dugo, Matteo and Zamagni, Claudio and Thill, Marc and Anton, Anton and Zambelli, Stefania and Russo, Stefania and Ciruelos, Eva Maria and Greil, Richard and Gy{\H o}rffy, Bal{\'a}zs and Semiglazov, Vladimir and Colleoni, Marco and Kelly, Catherine M. and Mariani, Gabriella and Del Mastro, Lucia and Biasi, Olivia and Seitz, Robert S. and Valagussa, Pinuccia and Viale, Giuseppe and Gianni, Luca and Bianchini, Giampaolo and Ali, H. Raza}, + year = {2023}, + month = sep, + journal = {Nature}, + volume = {621}, + number = {7980}, + pages = {868--876}, + publisher = {Nature Publishing Group}, + issn = {1476-4687}, + doi = {10.1038/s41586-023-06498-3}, + urldate = {2024-10-27}, + abstract = {Immune checkpoint blockade (ICB) benefits some patients with triple-negative breast cancer, but what distinguishes responders from non-responders is unclear1. Because ICB targets cell--cell interactions2, we investigated the impact of multicellular spatial organization on response, and explored how ICB remodels the tumour microenvironment. We show that cell phenotype, activation state and spatial location are intimately linked, influence ICB effect and differ in sensitive versus resistant tumours early on-treatment. We used imaging mass cytometry3 to profile the in situ expression of 43 proteins in tumours from patients in a randomized trial of neoadjuvant ICB, sampled at three timepoints (baseline, n\,=\,243; early on-treatment, n\,=\,207; post-treatment, n\,=\,210). Multivariate modelling showed that the fractions of proliferating CD8+TCF1+T cells and MHCII+ cancer cells were dominant predictors of response, followed by cancer--immune interactions with B cells and granzyme B+ T cells. On-treatment, responsive tumours contained abundant granzyme B+ T cells, whereas resistant tumours were characterized by CD15+ cancer cells. Response was best predicted by combining tissue features before and on-treatment, pointing to a role for early biopsies in guiding adaptive therapy. Our findings show that multicellular spatial organization is a major determinant of ICB effect and suggest that its systematic enumeration in situ could help realize precision immuno-oncology.}, + copyright = {2023 The Author(s)}, + langid = {english}, + keywords = {Breast cancer,Immunotherapy,Tumour biomarkers}, + file = {/Users/hhakem/Zotero/storage/DVNWTN3A/Wang et al. - 2023 - Spatial predictors of immunotherapy response in tr.pdf} +} + +@article{watsonNovoDesignProtein2023, + title = {De Novo Design of Protein Structure and Function with {{RFdiffusion}}}, + author = {Watson, Joseph L. and Juergens, David and Bennett, Nathaniel R. and Trippe, Brian L. and Yim, Jason and Eisenach, Helen E. and Ahern, Woody and Borst, Andrew J. and Ragotte, Robert J. and Milles, Lukas F. and Wicky, Basile I. M. and Hanikel, Nikita and Pellock, Samuel J. and Courbet, Alexis and Sheffler, William and Wang, Jue and Venkatesh, Preetham and Sappington, Isaac and Torres, Susana V{\'a}zquez and Lauko, Anna and De Bortoli, Valentin and Mathieu, Emile and Ovchinnikov, Sergey and Barzilay, Regina and Jaakkola, Tommi S. and DiMaio, Frank and Baek, Minkyung and Baker, David}, + year = {2023}, + month = aug, + journal = {Nature}, + volume = {620}, + number = {7976}, + pages = {1089--1100}, + publisher = {Nature Publishing Group}, + issn = {1476-4687}, + doi = {10.1038/s41586-023-06415-8}, + urldate = {2024-11-17}, + abstract = {There has been considerable recent progress in designing new proteins using deep-learning methods1--9. Despite this progress, a general deep-learning framework for protein design that enables solution of a wide range of design challenges, including de novo binder design and design of higher-order symmetric architectures, has yet to be described. Diffusion models10,11 have had considerable success in image and language generative modelling but limited success when applied to protein modelling, probably due to the complexity of protein backbone geometry and sequence--structure relationships. Here we show that by fine-tuning the RoseTTAFold structure prediction network on protein structure denoising tasks, we obtain a generative model of protein backbones that achieves outstanding performance on unconditional and topology-constrained protein monomer design, protein binder design, symmetric oligomer design, enzyme active site scaffolding and symmetric motif scaffolding for therapeutic and metal-binding protein design. We demonstrate the power and generality of the method, called RoseTTAFold diffusion (RFdiffusion), by experimentally characterizing the structures and functions of hundreds of designed symmetric assemblies, metal-binding proteins and protein binders. The accuracy of RFdiffusion is confirmed by the cryogenic electron microscopy structure of a designed binder in complex with influenza haemagglutinin that is nearly identical to the design model. In a manner analogous to networks that produce images from user-specified inputs, RFdiffusion enables the design of diverse functional proteins from simple molecular specifications.}, + copyright = {2023 The Author(s)}, + langid = {english}, + keywords = {Machine learning,Protein design,Proteins}, + file = {/Users/hhakem/Zotero/storage/TGUGFNGE/Watson et al. - 2023 - De novo design of protein structure and function w.pdf} +} + +@article{watsonNovoDesignProtein2023a, + title = {De Novo Design of Protein Structure and Function with {{RFdiffusion}}}, + author = {Watson, Joseph L. and Juergens, David and Bennett, Nathaniel R. and Trippe, Brian L. and Yim, Jason and Eisenach, Helen E. and Ahern, Woody and Borst, Andrew J. and Ragotte, Robert J. and Milles, Lukas F. and Wicky, Basile I. M. and Hanikel, Nikita and Pellock, Samuel J. and Courbet, Alexis and Sheffler, William and Wang, Jue and Venkatesh, Preetham and Sappington, Isaac and Torres, Susana V{\'a}zquez and Lauko, Anna and De Bortoli, Valentin and Mathieu, Emile and Ovchinnikov, Sergey and Barzilay, Regina and Jaakkola, Tommi S. and DiMaio, Frank and Baek, Minkyung and Baker, David}, + year = {2023}, + month = aug, + journal = {Nature}, + volume = {620}, + number = {7976}, + pages = {1089--1100}, + publisher = {Nature Publishing Group}, + issn = {1476-4687}, + doi = {10.1038/s41586-023-06415-8}, + urldate = {2024-12-08}, + abstract = {There has been considerable recent progress in designing new proteins using deep-learning methods1--9. Despite this progress, a general deep-learning framework for protein design that enables solution of a wide range of design challenges, including de novo binder design and design of higher-order symmetric architectures, has yet to be described. Diffusion models10,11 have had considerable success in image and language generative modelling but limited success when applied to protein modelling, probably due to the complexity of protein backbone geometry and sequence--structure relationships. Here we show that by fine-tuning the RoseTTAFold structure prediction network on protein structure denoising tasks, we obtain a generative model of protein backbones that achieves outstanding performance on unconditional and topology-constrained protein monomer design, protein binder design, symmetric oligomer design, enzyme active site scaffolding and symmetric motif scaffolding for therapeutic and metal-binding protein design. We demonstrate the power and generality of the method, called RoseTTAFold diffusion (RFdiffusion), by experimentally characterizing the structures and functions of hundreds of designed symmetric assemblies, metal-binding proteins and protein binders. The accuracy of RFdiffusion is confirmed by the cryogenic electron microscopy structure of a designed binder in complex with influenza haemagglutinin that is nearly identical to the design model. In a manner analogous to networks that produce images from user-specified inputs, RFdiffusion enables the design of diverse functional proteins from simple molecular specifications.}, + copyright = {2023 The Author(s)}, + langid = {english}, + keywords = {Machine learning,Protein design,Proteins}, + file = {/Users/hhakem/Zotero/storage/I3XP6JRL/Watson et al. - 2023 - De novo design of protein structure and function w.pdf} +} + +@article{wengEnergeticAllostericLandscape2024, + title = {The Energetic and Allosteric Landscape for {{KRAS}} Inhibition}, + author = {Weng, Chenchun and Faure, Andre J. and Escobedo, Albert and Lehner, Ben}, + year = {2024}, + month = feb, + journal = {Nature}, + volume = {626}, + number = {7999}, + pages = {643--652}, + publisher = {Nature Publishing Group}, + issn = {1476-4687}, + doi = {10.1038/s41586-023-06954-0}, + urldate = {2024-11-18}, + abstract = {Thousands of proteins have been validated genetically as therapeutic targets for human diseases1. However, very few have been successfully targeted, and many are considered `undruggable'. This is particularly true for proteins that function via protein--protein interactions---direct inhibition of binding interfaces is difficult and requires the identification of allosteric sites. However, most proteins have no known allosteric sites, and a comprehensive allosteric map does not exist for any protein. Here we address this shortcoming by charting multiple global atlases of inhibitory allosteric communication in KRAS. We quantified the effects of more than 26,000 mutations on the folding of KRAS and its binding to six interaction partners. Genetic interactions in double mutants enabled us to perform biophysical measurements at scale, inferring more than 22,000 causal free energy changes. These energy landscapes quantify how mutations tune the binding specificity of a signalling protein and map the inhibitory allosteric sites for an important therapeutic target. Allosteric propagation is particularly effective across the central {$\beta$}-sheet of KRAS, and multiple surface pockets are genetically validated as allosterically active, including a distal pocket in the C-terminal lobe of the protein. Allosteric mutations typically inhibit binding to all tested effectors, but they can also change the binding specificity, revealing the regulatory, evolutionary and therapeutic potential to tune pathway activation. Using the approach described here, it should be possible to rapidly and comprehensively identify allosteric target sites in many proteins.}, + copyright = {2023 The Author(s)}, + langid = {english}, + keywords = {Biophysics,Computational biology and bioinformatics,Drug discovery,Genomics,Systems biology}, + file = {/Users/hhakem/Zotero/storage/TP5BH4RG/Weng et al. - 2024 - The energetic and allosteric landscape for KRAS in.pdf} +} + +@misc{WhatLeidenClustering, + title = {What Is {{Leiden Clustering}} in {{Network Analysis}} - {{Dgraph Blog}}}, + urldate = {2024-10-24}, + abstract = {Discover the power of Leiden clustering for network analysis. Identify communities within complex networks with precision and speed.}, + howpublished = {https://dgraph.io/blog/post/leiden-clustering/}, + file = {/Users/hhakem/Zotero/storage/FMIBWELU/leiden-clustering.html} +} + +@article{wickyHallucinatingSymmetricProtein2022, + title = {Hallucinating Symmetric Protein Assemblies}, + author = {Wicky, B. I. M. and Milles, L. F. and Courbet, A. and Ragotte, R. J. and Dauparas, J. and Kinfu, E. and Tipps, S. and Kibler, R. D. and Baek, M. and DiMaio, F. and Li, X. and Carter, L. and Kang, A. and Nguyen, H. and Bera, A. K. and Baker, D.}, + year = {2022}, + month = oct, + journal = {Science}, + volume = {378}, + number = {6615}, + pages = {56--61}, + publisher = {American Association for the Advancement of Science}, + doi = {10.1126/science.add1964}, + urldate = {2024-11-16}, + abstract = {Deep learning generative approaches provide an opportunity to broadly explore protein structure space beyond the sequences and structures of natural proteins. Here, we use deep network hallucination to generate a wide range of symmetric protein homo-oligomers given only a specification of the number of protomers and the protomer length. Crystal structures of seven designs are very similar to the computational models (median root mean square deviation: 0.6 angstroms), as are three cryo--electron microscopy structures of giant 10-nanometer rings with up to 1550 residues and C33 symmetry; all differ considerably from previously solved structures. Our results highlight the rich diversity of new protein structures that can be generated using deep learning and pave the way for the design of increasingly complex components for nanomachines and biomaterials.}, + file = {/Users/hhakem/Zotero/storage/6UU5667M/Wicky et al. - 2022 - Hallucinating symmetric protein assemblies.pdf} +} + +@misc{xuReverseImageRetrieval2024, + title = {Reverse {{Image Retrieval Cues Parametric Memory}} in {{Multimodal LLMs}}}, + author = {Xu, Jialiang and Moor, Michael and Leskovec, Jure}, + year = {2024}, + month = may, + number = {arXiv:2405.18740}, + eprint = {2405.18740}, + publisher = {arXiv}, + urldate = {2024-11-02}, + abstract = {Despite impressive advances in recent multimodal large language models (MLLMs), state-of-the-art models such as from the GPT-4 suite still struggle with knowledge-intensive tasks. To address this, we consider Reverse Image Retrieval (RIR) augmented generation, a simple yet effective strategy to augment MLLMs with web-scale reverse image search results. RIR robustly improves knowledge-intensive visual question answering (VQA) of GPT-4V by 37-43\%, GPT-4 Turbo by 25-27\%, and GPT-4o by 18-20\% in terms of open-ended VQA evaluation metrics. To our surprise, we discover that RIR helps the model to better access its own world knowledge. Concretely, our experiments suggest that RIR augmentation helps by providing further visual and textual cues without necessarily containing the direct answer to a query. In addition, we elucidate cases in which RIR can hurt performance and conduct a human evaluation. Finally, we find that the overall advantage of using RIR makes it difficult for an agent that can choose to use RIR to perform better than an approach where RIR is the default setting.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Computation and Language}, + file = {/Users/hhakem/Zotero/storage/KSEX7ICN/Xu et al. - 2024 - Reverse Image Retrieval Cues Parametric Memory in .pdf;/Users/hhakem/Zotero/storage/ZCKDARAJ/2405.html} +} + +@misc{yangMultiDomainTranslationLearning2019, + title = {Multi-{{Domain Translation}} by {{Learning Uncoupled Autoencoders}}}, + author = {Yang, Karren D. and Uhler, Caroline}, + year = {2019}, + month = feb, + number = {arXiv:1902.03515}, + eprint = {1902.03515}, + primaryclass = {cs, stat}, + publisher = {arXiv}, + urldate = {2024-09-23}, + abstract = {Multi-domain translation seeks to learn a probabilistic coupling between marginal distributions that reflects the correspondence between different domains. We assume that data from different domains are generated from a shared latent representation based on a structural equation model. Under this assumption, we prove that the problem of computing a probabilistic coupling between marginals is equivalent to learning multiple uncoupled autoencoders that embed to a given shared latent distribution. In addition, we propose a new framework and algorithm for multi-domain translation based on learning the shared latent distribution and training autoencoders under distributional constraints. A key practical advantage of our framework is that new autoencoders (i.e., new domains) can be added sequentially to the model without retraining on the other domains, which we demonstrate experimentally on image as well as genomics datasets.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {68T01,Computer Science - Machine Learning,Statistics - Machine Learning}, + file = {/Users/hhakem/Zotero/storage/TWKXQU77/Yang and Uhler - 2019 - Multi-Domain Translation by Learning Uncoupled Aut.pdf} +} + +@article{yaziciUNUSUALEFFECTIVENESSAVERAGING2019, + title = {{{THE UNUSUAL EFFECTIVENESS OF AVERAGING IN GAN TRAINING}}}, + author = {Yaz{\i}c{\i}, Yasin and Foo, Chuan-Sheng and Winkler, Stefan and Yap, Kim-Hui and Piliouras, Georgios and Chandrasekhar, Vijay}, + year = {2019}, + langid = {english}, + file = {/Users/hhakem/Zotero/storage/YIRKKXUA/Yazıcı et al. - 2019 - THE UNUSUAL EFFECTIVENESS OF AVERAGING IN GAN TRAI.pdf} +} + +@article{Zfactor2023, + title = {Z-Factor}, + year = {2023}, + month = dec, + journal = {Wikipedia}, + urldate = {2024-07-08}, + abstract = {The Z-factor is a measure of statistical effect size. It has been proposed for use in high-throughput screening (HTS), where it is also known as Z-prime, to judge whether the response in a particular assay is large enough to warrant further attention.}, + copyright = {Creative Commons Attribution-ShareAlike License}, + langid = {english}, + keywords = {Read}, + annotation = {Page Version ID: 1190674560}, + file = {/Users/hhakem/Zotero/storage/IVB3KNZJ/Z-factor.html} +} + +@misc{zhangIdentifiabilityGuaranteesCausal2023, + title = {Identifiability {{Guarantees}} for {{Causal Disentanglement}} from {{Soft Interventions}}}, + author = {Zhang, Jiaqi and Squires, Chandler and Greenewald, Kristjan and Srivastava, Akash and Shanmugam, Karthikeyan and Uhler, Caroline}, + year = {2023}, + month = nov, + number = {arXiv:2307.06250}, + eprint = {2307.06250}, + primaryclass = {cs, math, stat}, + publisher = {arXiv}, + urldate = {2024-09-23}, + abstract = {Causal disentanglement aims to uncover a representation of data using latent variables that are interrelated through a causal model. Such a representation is identifiable if the latent model that explains the data is unique. In this paper, we focus on the scenario where unpaired observational and interventional data are available, with each intervention changing the mechanism of a latent variable. When the causal variables are fully observed, statistically consistent algorithms have been developed to identify the causal model under faithfulness assumptions. We here show that identifiability can still be achieved with unobserved causal variables, given a generalized notion of faithfulness. Our results guarantee that we can recover the latent causal model up to an equivalence class and predict the effect of unseen combinations of interventions, in the limit of infinite data. We implement our causal disentanglement framework by developing an autoencoding variational Bayes algorithm and apply it to the problem of predicting combinatorial perturbation effects in genomics.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Machine Learning,Mathematics - Statistics Theory,Statistics - Machine Learning,Statistics - Methodology}, + file = {/Users/hhakem/Zotero/storage/XL4VFS94/Zhang et al. - 2023 - Identifiability Guarantees for Causal Disentanglem.pdf} +} + +@inproceedings{zhouLearningDeepFeatures2016, + title = {Learning {{Deep Features}} for {{Discriminative Localization}}}, + booktitle = {2016 {{IEEE Conference}} on {{Computer Vision}} and {{Pattern Recognition}} ({{CVPR}})}, + author = {Zhou, Bolei and Khosla, Aditya and Lapedriza, Agata and Oliva, Aude and Torralba, Antonio}, + year = {2016}, + month = jun, + pages = {2921--2929}, + issn = {1063-6919}, + doi = {10.1109/CVPR.2016.319}, + urldate = {2024-07-08}, + abstract = {In this work, we revisit the global average pooling layer proposed in [13], and shed light on how it explicitly enables the convolutional neural network (CNN) to have remarkable localization ability despite being trained on imagelevel labels. While this technique was previously proposed as a means for regularizing training, we find that it actually builds a generic localizable deep representation that exposes the implicit attention of CNNs on an image. Despite the apparent simplicity of global average pooling, we are able to achieve 37.1\% top-5 error for object localization on ILSVRC 2014 without training on any bounding box annotation. We demonstrate in a variety of experiments that our network is able to localize the discriminative image regions despite just being trained for solving classification task1.}, + keywords = {Computer vision,Detectors,Neural networks,Object recognition,Spatial resolution,Training,UnRead,Visualization}, + file = {/Users/hhakem/Zotero/storage/5JBQBZV4/Zhou et al. - 2016 - Learning Deep Features for Discriminative Localiza.pdf;/Users/hhakem/Zotero/storage/7FGCGFP2/7780688.html} +} + +@misc{zhuUnpairedImagetoImageTranslation2020, + title = {Unpaired {{Image-to-Image Translation}} Using {{Cycle-Consistent Adversarial Networks}}}, + author = {Zhu, Jun-Yan and Park, Taesung and Isola, Phillip and Efros, Alexei A.}, + year = {2020}, + month = aug, + number = {arXiv:1703.10593}, + eprint = {1703.10593}, + primaryclass = {cs}, + publisher = {arXiv}, + doi = {10.48550/arXiv.1703.10593}, + urldate = {2024-07-03}, + abstract = {Image-to-image translation is a class of vision and graphics problems where the goal is to learn the mapping between an input image and an output image using a training set of aligned image pairs. However, for many tasks, paired training data will not be available. We present an approach for learning to translate an image from a source domain \$X\$ to a target domain \$Y\$ in the absence of paired examples. Our goal is to learn a mapping \$G: X {\textbackslash}rightarrow Y\$ such that the distribution of images from \$G(X)\$ is indistinguishable from the distribution \$Y\$ using an adversarial loss. Because this mapping is highly under-constrained, we couple it with an inverse mapping \$F: Y {\textbackslash}rightarrow X\$ and introduce a cycle consistency loss to push \$F(G(X)) {\textbackslash}approx X\$ (and vice versa). Qualitative results are presented on several tasks where paired training data does not exist, including collection style transfer, object transfiguration, season transfer, photo enhancement, etc. Quantitative comparisons against several prior methods demonstrate the superiority of our approach.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Computer Vision and Pattern Recognition,Read}, + file = {/Users/hhakem/Zotero/storage/TXY4MEKG/Zhu et al. - 2020 - Unpaired Image-to-Image Translation using Cycle-Co.pdf;/Users/hhakem/Zotero/storage/M4BG9VW4/1703.html} +} diff --git a/manuscript/2024_12_attribution_broad_retreat/org-preamble.sty b/manuscript/2024_12_attribution_broad_retreat/org-preamble.sty new file mode 100644 index 0000000..ec1a8be --- /dev/null +++ b/manuscript/2024_12_attribution_broad_retreat/org-preamble.sty @@ -0,0 +1,3 @@ +%% \RequirePackage{amsmath,amssymb} + +\RequirePackage{hyperref} diff --git a/manuscript/2024_12_attribution_broad_retreat/poster.org b/manuscript/2024_12_attribution_broad_retreat/poster.org index 4ffe325..b756da6 100644 --- a/manuscript/2024_12_attribution_broad_retreat/poster.org +++ b/manuscript/2024_12_attribution_broad_retreat/poster.org @@ -1,6 +1,6 @@ #+startup: beamer #+Title: Exploring gene function and morphology using JUMP Cell Painting Consortium data: The JUMP toolkit -#+AUTHOR: $AUTHOR +#+AUTHOR: Hugo Hakem, Alán F. Muñoz, Shantanu Singh and Anne E. Carpenter #+OPTIONS: toc:nil num:nil date:nil tex:t title:nil author:t email:nil ^:nil #+LATEX_CLASS: beamerposter #+BEAMER_THEME: gemini @@ -13,17 +13,17 @@ #+cite_export: csl * Footer (Optional) :ignore: -#+BEAMER_HEADER: \footercontent{ -#+BEAMER_HEADER: \href{https://github.com/USER/PROJECT/poster.pdf}{https://github.com/USER/PROJECT/poster.pdf} \hfill -#+BEAMER_HEADER: Broad Retreat 2024, Boston, US \hfill -#+BEAMER_HEADER: \href{mailto:hhakem@broadinstitute.org}{hhakem@broadinstitute.org}} -# (can be left out to remove footer) +# #+BEAMER_HEADER: \footercontent{ +# #+BEAMER_HEADER: \href{https://github.com/USER/PROJECT/poster.pdf}{https://github.com/USER/PROJECT/poster.pdf} \hfill +# #+BEAMER_HEADER: Broad Retreat 2024, Boston, US \hfill +# #+BEAMER_HEADER: \href{mailto:hhakem@broadinstitute.org}{hhakem@broadinstitute.org}} +# # (can be left out to remove footer) * Logo (Optional) :ignore: # use this to include logos on the left and/or right side of the header: -# #+BEAMER_HEADER: \logoleft{\includegraphics[height=12cm]{figs/qr_hub.png}} # Outcommented -#+BEAMER_HEADER: \logoright{\includegraphics[height=3cm]{logos/broad_logo.png}} +# #+BEAMER_HEADER: \logoleft{\includegraphics [height=12cm]{figs/qr_hub.png}} # Outcommented +#+BEAMER_HEADER: \logoright{\includegraphics [height=5cm]{logos/broad_logo.png}} # # # ==================== # # # Body @@ -39,35 +39,30 @@ :BEAMER_col: 0.3 :END: *** Abstract -With the Cell Painting assay we quantify cell morphology using six dyes to stain eight cellular components: Nucleus, mitochondria, endoplasmic reticulum, nucleoli, cytoplasmic RNA, actin, golgi aparatus, and plasma membrane. After high-throughput fluorescence microscopy, image analysis algorithms then extract thousands of morphological features from each single cell’s image. By comparing of these “profiles” we can can uncover new relationships among genetic and chemical perturbations. +With the Cell Painting assay we quantify cell morphology using six dyes to stain eight cellular components: Nucleus, mitochondria, endoplasmic reticulum, nucleoli, cytoplasmic RNA, actin, golgi aparatus, and plasma membrane. After high-throughput fluorescence microscopy, image analysis algorithms then extract thousands of morphological features from each single cell’s image. By comparing of these “pro files” we can can uncover new relationships among genetic and chemical perturbations. -The JUMP-CP Consortium (Joint Undertaking for Morphological Profiling-Cell Painting) released the first public high-throughput dataset with over 140,000 genetic and chemical perturbations [cite:@chandrasekaranJUMPCellPainting2023]. -Here, we describe how this data can now be used to answer many biological questions. Researchers can pick any gene of interest and find what morphological phenotypes are induced when it is knocked-out or overexpressed and what genes produce a similar morphological profile when altered, uncovering functional relationships. Novel software tools developed for this dataset empower biologists to make discoveries of their own, and we show that mining this dataset can yield novel insights into current and relevant biological questions. +Here, we describe how this data can now be used to answer many biological questions. Researchers can pick any gene of interest and find what morphological phenotypes are induced when it is knocked-out or overexpressed and what genes produce a similar morphological pro file when altered, uncovering functional relationships. Novel software tools developed for this dataset empower biologists to make discoveries of their own, and we show that mining this dataset can yield novel insights into current and relevant biological questions. \vspace*{2cm} *** Goals :PROPERTIES: :BEAMER_env: exampleblock :END: -\heading{Device methods to interpret profile-based datasets to yield useful biological insight.} +\heading{Device methods to interpret pro file-based datasets to yield useful biological insight.} \heading{Develop a tool/workflow for biologists and computer scientists to discover genes that result in phenotypes similar to theirs.} \heading{Build a stepping stone for a universal and accessible framework against which biologists can validate cell phenotypes.} *** We use data from the Cell Painting assay, in which cellular components are stained using six dyes and imaged in five channels #+ATTR_LATEX: :width 1\textwidth -[[file:figs/cellpainting.png]] +[[../../workspace/analysis/figures/mask_size_dac.png]] \vspace*{2cm} -*** Morphological profiles were generated at a high-throughput scale +*** Morphological pro files were generated at a high-throughput scale We generated and preprocessed a database composed of thousands of cell painting experiments. #+ATTR_LATEX: :width 1\textwidth -[[file:figs/cell_painting_overview.png]] \vspace*{2cm} -*** Online version of this poster -#+ATTR_LATEX: :width 0.4\textwidth -[[file:figs/qr_poster.png]] ** @@latex:@@ :BMCOL: :PROPERTIES: @@ -75,22 +70,18 @@ We generated and preprocessed a database composed of thousands of cell painting :END: *** We generated a reference dataset for cells and features that indicates clustered groups of genes -After applying batch correction, it becomes possible to query individual genes and find similar profiles. Precomputed distances for morphological profiles are made available. +After applying batch correction, it becomes possible to query individual genes and find similar pro files. Precomputed distances for morphological profiles are made available. -*** The JUMP consortium produced a massive set of morphological profiles +*** The JUMP consortium produced a massive set of morphological pro files #+ATTR_LATEX: :width 0.8\textwidth -[[file:figs/clustermap_cells_features.png]] *** We pre-calculated correlations between perturbations #+ATTR_LATEX: :width 0.8\textwidth -[[file:figs/clustermap_correlation.png]] A gallery to fetch all the available images for a given perturbation. -[[file:figs/gallery.png]] *** Which other perturbations produce a phenotype similar to my gene of interest? We developed an ecosystem tools for scientist to find the perturbations most similar to theirs. -[[file:figs/web_interface.png]] ** @@latex:@@ :BMCOL: :PROPERTIES: @@ -98,7 +89,6 @@ We developed an ecosystem tools for scientist to find the perturbations most sim :END: *** Which features are the most significant for my gene of interest? Statistical values of all features for a given perturbation. -[[file:figs/features.png]] *** A standard analysis workflow has the following steps: 1. Find the most correlated and anticorrelated genes. @@ -113,43 +103,26 @@ Statistical values of all features for a given perturbation. :END: We compare images using tools that decompose the channels to focus on the most important features obtained from data mining -- [[https://github.com/broadinstitute/monorepo/tree/main/libs/jump_babel][broad_babel]]: Find the basic metadata for all perturbations -- [[https://github.com/broadinstitute/monorepo/tree/main/libs/jump_portrait][jump_portrait]]: Fetch images using perturbation identifiers. -- [[https://github.com/afermg/cp_measure][cp_measure]]: is a new and experimental tool to extract CellProfiler features directly from images. - -Examples of these tools and other workflows are available on the JUMP Hub. - -[[file:figs/jump_hub_images.png]] - +EXAMPLE OF A CITATION *** Available resources :PROPERTIES: :BEAMER_env: block :END: -| Dataset | ORF | CRISPR | -|---------------+----------------------+-------------------------| -| Description | Gene overexpression | Gene knock-out | -|---------------+----------------------+-------------------------| -| Genes ranking | [[https://broad.io/orf][broad.io/orf]] | [[https://broad.io/crispr][broad.io/crispr]] | -| Features | [[https://broad.io/orf_feature][broad.io/orf_feature]] | [[https://broad.io/crispr_feature][broad.io/crispr_feature]] | -| Gallery | [[https://broad.io/orf_gallery][broad.io/orf_gallery]] | [[https://broad.io/crispr_gallery][broad.io/crispr_gallery]] | - - +test block *** Conclusions :B_exampleblock: :PROPERTIES: :BEAMER_env: alertblock :END: -All data and tools for programatic and manual access to the data are made available so people can explore and train models [cite:@chandrasekaranImagebasedProfilingDrug2021]. Refer to [[http://broad.io/jump][broad.io/jump]] for more information. \heading{The JUMP Cell Painting can serve as a resource to obtain candidate genes to find further insight on genes or proteins of interest.} \heading{Our querying systems can help both biologists and data scientists to accelerate their biological discoveries by providing means to interpret features and listing genes with similar phenotypes} - - +[cite:@augustinDiffusionVisualCounterfactual2022] *** References :PROPERTIES: :BEAMER_env: block :END: -# Reminder: You can export these as local-bib.bib using (citar-export-local-bib-file) +# Reminder: You can export these as local-bib.bib using (citar-export-local-bib- file) #+print_bibliography: diff --git a/workspace/analysis/image_classifier_script.py b/workspace/analysis/image_classifier_script.py index 8b99669..c877dba 100755 --- a/workspace/analysis/image_classifier_script.py +++ b/workspace/analysis/image_classifier_script.py @@ -537,7 +537,7 @@ def create_dataset_fold(dataset_func, filename=f"StarGANv2_image_crop_active_fold_{fold}_"+"{epoch}-{step}", #-{train_acc_true:.2f}-{train_acc_fake:.2f}", #monitor="val_acc", #mode="max", - every_n_train_steps=50, + every_n_train_steps=100, enable_version_counter=True) #every_n_epochs=1)