Add files via upload

MasterAI-EAM · Jun 4, 2023 · 807d66c · 807d66c
1 parent c149c5a
commit 807d66c
Showing 1 changed file with 181 additions and 0 deletions.
diff --git a/example.ipynb b/example.ipynb
@@ -0,0 +1,181 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "from transformers import LlamaTokenizer, LlamaForCausalLM\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "loading model, path: darwin/mix_training2\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "ad9276358c094ba89cae00cbd314226c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "#load the model\n",
+    "model_path = 'darwin-7B'\n",
+    "print(\"loading model, path:\", model_path)\n",
+    "tokenizer = LlamaTokenizer.from_pretrained(model_path)\n",
+    "\n",
+    "model = LlamaForCausalLM.from_pretrained(\n",
+    "    model_path,\n",
+    "    load_in_8bit=False,\n",
+    "    torch_dtype=torch.float16,\n",
+    "    device_map=\"auto\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def generate_prompt(instruction, input=None):\n",
+    "    if input:\n",
+    "        return f\"\"\"The following is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n",
+    "    ### Instruction:\n",
+    "    {instruction}\n",
+    "    ### Input:\n",
+    "    {input}\n",
+    "    ### Response:\"\"\"\n",
+    "    else:\n",
+    "        return f\"\"\"The following is an instruction that describes a task. Write a response that appropriately completes the request.\n",
+    "    ### Instruction:\n",
+    "    {instruction}\n",
+    "    ### Response:\"\"\"\n",
+    "\n",
+    "def process_response(response):\n",
+    "    response = response.split('Response: ')[1].split('\\n')[0]\n",
+    "    return response\n",
+    "\n",
+    "def evaluate(instruction,\n",
+    "             input = None,\n",
+    "             temperature = 0.8,\n",
+    "             top_p = 0.75,\n",
+    "             top_k=40,\n",
+    "             do_sample=True,\n",
+    "             repetition_penalty=1.0,\n",
+    "             max_new_tokens=256,\n",
+    "             **kwargs):\n",
+    "    prompt = generate_prompt(instruction,input)\n",
+    "    input_ids = tokenizer(prompt, return_tensors=\"pt\").input_ids.to(\"cuda\")\n",
+    "    generated_ids = model.generate(\n",
+    "        input_ids, \n",
+    "        max_new_tokens=max_new_tokens, \n",
+    "        do_sample=do_sample, \n",
+    "        repetition_penalty=repetition_penalty, \n",
+    "        temperature=temperature, \n",
+    "        top_p=top_p, \n",
+    "        top_k=top_k,\n",
+    "        **kwargs\n",
+    "    )\n",
+    "    response = tokenizer.decode(generated_ids[0])\n",
+    "    response = process_response(response)\n",
+    "    return response\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Instruction: Write lipophilicity of given SMILES. CC(C)C(NC(=O)CN1C(=O)C(=CN=C1C2CCCCC2)NC(=O)OCc3ccccc3)C(=O)C(F)(F)F\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/halona/anaconda3/envs/darwin/lib/python3.10/site-packages/transformers/generation/utils.py:1253: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use a generation configuration file (see https://huggingface.co/docs/transformers/main_classes/text_generation)\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Response: 2.26\n",
+      "------------------\n",
+      "Instruction: Given compound, write its potential SELFIES. Decalin\n",
+      "Response: [C][C][C][C][C][C][C][C][C][Ring1][Branch1][C][Ring1][=Branch2][C][Ring1][=C][C][Ring1][#Branch2][C][Ring1][=C][C][Ring1][=C][Ring2][Ring1][Ring1][Ring2][Ring1][Ring1][C]\n",
+      "------------------\n",
+      "Instruction: What is water solubility expressed as a logarithm in mol/L of given compound in room temperature? Methyl acrylate\n",
+      "Response: -0.22\n",
+      "------------------\n",
+      "Instruction: Tell me if given composition has glass formation ability. Ni53.5B44C2.5\n",
+      "Response: No, Ni53.5B44C2.5 does not have glass formation ability.\n",
+      "------------------\n",
+      "Instruction: Is composition metal? InSb2S4Cl\n",
+      "Response: No, InSb2S4Cl is not metal.\n",
+      "------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "for instruction in [\n",
+    "    'Write lipophilicity of given SMILES. CC(C)C(NC(=O)CN1C(=O)C(=CN=C1C2CCCCC2)NC(=O)OCc3ccccc3)C(=O)C(F)(F)F',\n",
+    "    'Given compound, write its potential SELFIES. Decalin',\n",
+    "    'What is water solubility expressed as a logarithm in mol/L of given compound in room temperature? Methyl acrylate',\n",
+    "    'Tell me if given composition has glass formation ability. Ni53.5B44C2.5',\n",
+    "    'Is composition metal? InSb2S4Cl'\n",
+    "]:\n",
+    "    print(\"Instruction:\",instruction)\n",
+    "    print('Response:',evaluate(instruction))\n",
+    "    print('------------------')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "darwin",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}