Skip to content

Commit

Permalink
tfs
Browse files Browse the repository at this point in the history
  • Loading branch information
zingp committed May 21, 2021
1 parent 206155a commit c5e4e7b
Showing 1 changed file with 170 additions and 23 deletions.
193 changes: 170 additions & 23 deletions huggingface_transformers/transformers_bert.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -673,7 +673,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -682,7 +682,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -691,7 +691,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 18,
"metadata": {},
"outputs": [
{
Expand All @@ -704,7 +704,7 @@
" 1, 1, 1, 1, 1, 1]])}"
]
},
"execution_count": 20,
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -1512,7 +1512,7 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -1521,7 +1521,7 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -1530,7 +1530,7 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -1539,7 +1539,7 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -1548,39 +1548,144 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Truncation was not explicitely activated but `max_length` is provided a specific value, please use `truncation=True` to explicitely truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.\n",
"/usr/local/anaconda2/envs/pt-tf-env/lib/python3.6/site-packages/transformers/tokenization_utils_base.py:1770: FutureWarning: The `pad_to_max_length` argument is deprecated and will be removed in a future version, use `padding=True` or `padding='longest'` to pad to the longest sequence in the batch, or use `padding='max_length'` to pad to a max length. In this case, you can give a specific length with `max_length` (e.g. `max_length=45`) or leave max_length to None to pad to the maximal input size of the model (e.g. 512 for Bert).\n",
" FutureWarning,\n"
]
},
{
"data": {
"text/plain": [
"{'input_ids': tensor([[ 101, 2769, 812, 3341, 6407, 6407, 4281, 6873, 4638, 8815, 8716, 3563,\n",
" 1798, 1416, 102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}"
" 1798, 1416, 117, 1420, 6432, 2124, 7478, 2382, 1326, 2154, 1557, 102,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])}"
]
},
"execution_count": 38,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"inputs = tokernizer(\"我们来试试牛逼的bert模型吧\", return_tensors=\"pt\")\n",
"inputs = tokernizer(\"我们来试试牛逼的bert模型吧, 听说它非常厉害啊\",\n",
" max_length = 64, # maximum length of a sentence\n",
" pad_to_max_length=True, # Add [PAD]s\n",
" return_attention_mask = True,\n",
" return_tensors=\"pt\")\n",
"inputs"
]
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'input_ids': tensor([[ 101, 2769, 812, 3341, 6407, 6407, 4281, 6873, 4638, 8815, 8716, 3563,\n",
" 1798, 1416, 117, 1420, 6432, 2124, 7478, 2382, 1326, 2154, 1557, 102,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])}"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"inp2 = tokernizer.encode_plus(\"我们来试试牛逼的bert模型吧, 听说它非常厉害啊\",\n",
" padding='max_length',\n",
" truncation=True,\n",
" max_length = 64, # maximum length of a sentence\n",
" pad_to_max_length=True, # Add [PAD]s\n",
" return_attention_mask = True,\n",
" return_tensors=\"pt\")\n",
"inp2"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 101, 2769, 812, 3341, 6407, 6407, 4281, 6873, 4638, 8815, 8716,\n",
" 3563, 1798, 1416, 117, 1420, 6432, 2124, 7478, 2382, 1326, 2154,\n",
" 1557, 102])"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ids = inp2[\"input_ids\"].data.cpu().numpy().reshape(-1)\n",
"ids[ids!=0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tokernizer.decode(inp2[\"input_ids\"].data.cpu().numpy().reshape(-1))"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
"?tokernizer.encode_plus"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 101, 2769, 812, 3341, 6407, 6407, 4281, 6873, 4638, 8815, 8716, 3563,\n",
" 1798, 1416, 102]])"
" 1798, 1416, 117, 1420, 6432, 2124, 7478, 2382, 1326, 2154, 1557, 102]])"
]
},
"execution_count": 39,
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -1592,16 +1697,16 @@
},
{
"cell_type": "code",
"execution_count": 40,
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])"
"tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])"
]
},
"execution_count": 40,
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -1613,16 +1718,16 @@
},
{
"cell_type": "code",
"execution_count": 41,
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])"
"tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])"
]
},
"execution_count": 41,
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -5044,9 +5149,22 @@
},
{
"cell_type": "code",
"execution_count": 52,
"execution_count": 60,
"metadata": {},
"outputs": [],
"outputs": [
{
"ename": "InvalidArgument",
"evalue": "[ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Got invalid dimensions for input: input.3 for the following indices\n index: 1 Got: 24 Expected: 15\n Please fix either the inputs or the model.",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mInvalidArgument\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-60-485b3959a3b9>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mname1\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0minput_ids\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcpu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_inputs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mtoken_type_ids\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcpu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname2\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcpu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/usr/local/anaconda2/envs/pt-tf-env/lib/python3.6/site-packages/onnxruntime/capi/onnxruntime_inference_collection.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, output_names, input_feed, run_options)\u001b[0m\n\u001b[1;32m 186\u001b[0m \u001b[0moutput_names\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_outputs_meta\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 187\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 188\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sess\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput_names\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_feed\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrun_options\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 189\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mC\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mEPFail\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 190\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_enable_fallback\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mInvalidArgument\u001b[0m: [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Got invalid dimensions for input: input.3 for the following indices\n index: 1 Got: 24 Expected: 15\n Please fix either the inputs or the model."
]
}
],
"source": [
"out = session.run(None, {name1: input_ids.cpu().numpy(), session.get_inputs()[2].name: token_type_ids.cpu().numpy(), name2: attention_mask.cpu().numpy() })"
]
Expand Down Expand Up @@ -5092,6 +5210,35 @@
"#[-0.0022, 0.3962, -0.4054, ..., 0.3902, -0.1599, 0.0457]"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
"?torch.onnx.export"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'1.3.0'"
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"torch.__version__"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down

0 comments on commit c5e4e7b

Please sign in to comment.