From c5e4e7b0fab346c7ea43fd2154a0831ddc4c7f34 Mon Sep 17 00:00:00 2001 From: zingp <605156398@qq.com> Date: Fri, 21 May 2021 17:59:38 +0800 Subject: [PATCH] tfs --- .../transformers_bert.ipynb | 193 +++++++++++++++--- 1 file changed, 170 insertions(+), 23 deletions(-) diff --git a/huggingface_transformers/transformers_bert.ipynb b/huggingface_transformers/transformers_bert.ipynb index 2ad1cac..4d63ab4 100644 --- a/huggingface_transformers/transformers_bert.ipynb +++ b/huggingface_transformers/transformers_bert.ipynb @@ -673,7 +673,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -682,7 +682,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -691,7 +691,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -704,7 +704,7 @@ " 1, 1, 1, 1, 1, 1]])}" ] }, - "execution_count": 20, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -1512,7 +1512,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -1521,7 +1521,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -1530,7 +1530,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -1539,7 +1539,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -1548,39 +1548,144 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 8, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Truncation was not explicitely activated but `max_length` is provided a specific value, please use `truncation=True` to explicitely truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.\n", + "/usr/local/anaconda2/envs/pt-tf-env/lib/python3.6/site-packages/transformers/tokenization_utils_base.py:1770: FutureWarning: The `pad_to_max_length` argument is deprecated and will be removed in a future version, use `padding=True` or `padding='longest'` to pad to the longest sequence in the batch, or use `padding='max_length'` to pad to a max length. In this case, you can give a specific length with `max_length` (e.g. `max_length=45`) or leave max_length to None to pad to the maximal input size of the model (e.g. 512 for Bert).\n", + " FutureWarning,\n" + ] + }, { "data": { "text/plain": [ "{'input_ids': tensor([[ 101, 2769, 812, 3341, 6407, 6407, 4281, 6873, 4638, 8815, 8716, 3563,\n", - " 1798, 1416, 102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}" + " 1798, 1416, 117, 1420, 6432, 2124, 7478, 2382, 1326, 2154, 1557, 102,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])}" ] }, - "execution_count": 38, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "inputs = tokernizer(\"我们来试试牛逼的bert模型吧\", return_tensors=\"pt\")\n", + "inputs = tokernizer(\"我们来试试牛逼的bert模型吧, 听说它非常厉害啊\",\n", + " max_length = 64, # maximum length of a sentence\n", + " pad_to_max_length=True, # Add [PAD]s\n", + " return_attention_mask = True,\n", + " return_tensors=\"pt\")\n", "inputs" ] }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'input_ids': tensor([[ 101, 2769, 812, 3341, 6407, 6407, 4281, 6873, 4638, 8815, 8716, 3563,\n", + " 1798, 1416, 117, 1420, 6432, 2124, 7478, 2382, 1326, 2154, 1557, 102,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])}" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inp2 = tokernizer.encode_plus(\"我们来试试牛逼的bert模型吧, 听说它非常厉害啊\",\n", + " padding='max_length',\n", + " truncation=True,\n", + " max_length = 64, # maximum length of a sentence\n", + " pad_to_max_length=True, # Add [PAD]s\n", + " return_attention_mask = True,\n", + " return_tensors=\"pt\")\n", + "inp2" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 101, 2769, 812, 3341, 6407, 6407, 4281, 6873, 4638, 8815, 8716,\n", + " 3563, 1798, 1416, 117, 1420, 6432, 2124, 7478, 2382, 1326, 2154,\n", + " 1557, 102])" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ids = inp2[\"input_ids\"].data.cpu().numpy().reshape(-1)\n", + "ids[ids!=0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tokernizer.decode(inp2[\"input_ids\"].data.cpu().numpy().reshape(-1))" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [], + "source": [ + "?tokernizer.encode_plus" + ] + }, + { + "cell_type": "code", + "execution_count": 57, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[ 101, 2769, 812, 3341, 6407, 6407, 4281, 6873, 4638, 8815, 8716, 3563,\n", - " 1798, 1416, 102]])" + " 1798, 1416, 117, 1420, 6432, 2124, 7478, 2382, 1326, 2154, 1557, 102]])" ] }, - "execution_count": 39, + "execution_count": 57, "metadata": {}, "output_type": "execute_result" } @@ -1592,16 +1697,16 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 58, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])" + "tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])" ] }, - "execution_count": 40, + "execution_count": 58, "metadata": {}, "output_type": "execute_result" } @@ -1613,16 +1718,16 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 59, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])" + "tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])" ] }, - "execution_count": 41, + "execution_count": 59, "metadata": {}, "output_type": "execute_result" } @@ -5044,9 +5149,22 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 60, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "InvalidArgument", + "evalue": "[ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Got invalid dimensions for input: input.3 for the following indices\n index: 1 Got: 24 Expected: 15\n Please fix either the inputs or the model.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mInvalidArgument\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mname1\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0minput_ids\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcpu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_inputs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mtoken_type_ids\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcpu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname2\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcpu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/usr/local/anaconda2/envs/pt-tf-env/lib/python3.6/site-packages/onnxruntime/capi/onnxruntime_inference_collection.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, output_names, input_feed, run_options)\u001b[0m\n\u001b[1;32m 186\u001b[0m \u001b[0moutput_names\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_outputs_meta\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 187\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 188\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sess\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput_names\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_feed\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrun_options\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 189\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mC\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mEPFail\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 190\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_enable_fallback\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mInvalidArgument\u001b[0m: [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Got invalid dimensions for input: input.3 for the following indices\n index: 1 Got: 24 Expected: 15\n Please fix either the inputs or the model." + ] + } + ], "source": [ "out = session.run(None, {name1: input_ids.cpu().numpy(), session.get_inputs()[2].name: token_type_ids.cpu().numpy(), name2: attention_mask.cpu().numpy() })" ] @@ -5092,6 +5210,35 @@ "#[-0.0022, 0.3962, -0.4054, ..., 0.3902, -0.1599, 0.0457]" ] }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [], + "source": [ + "?torch.onnx.export" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'1.3.0'" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "torch.__version__" + ] + }, { "cell_type": "code", "execution_count": null,