Skip to content

Commit

Permalink
Revert meta-formatting and merge main version with torchtext v091
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcusFra committed Jun 2, 2021
1 parent c4117d3 commit 39bc545
Showing 1 changed file with 21 additions and 25 deletions.
46 changes: 21 additions & 25 deletions chapter5/Chapter 5.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
"execution_count": null,
"outputs": [],
"source": [
"!pip install torchtext~=0.7.0\n",
"!pip install torch~=1.6"
"!pip install torchtext==0.9.1\n",
"!pip install torch==1.8.1"
],
"metadata": {
"collapsed": false,
Expand All @@ -28,16 +28,12 @@
"metadata": {},
"outputs": [],
"source": [
"import torch \n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"import torch.optim as optim\n",
"import numpy as np\n",
"from torchtext import data \n",
"import spacy\n",
"import torchtext\n",
"from pathlib import Path\n",
"import pandas as pd\n",
"import spacy"
"import torch.nn as nn\n",
"import torch.optim as optim\n",
"from torchtext.legacy import data"
]
},
{
Expand All @@ -64,8 +60,8 @@
"source": [
"# You'll probably need to use the 'python' engine to load the CSV\n",
"# tweetsDF = pd.read_csv(\"training.1600000.processed.noemoticon.csv\", header=None)\n",
"tweetsDF = pd.read_csv(\"training.1600000.processed.noemoticon.csv\", \n",
"engine=\"python\", header=None)"
"tweetsDF = pd.read_csv(\"training.1600000.processed.noemoticon.csv\",\n",
" engine=\"python\", header=None)"
]
},
{
Expand Down Expand Up @@ -96,11 +92,10 @@
"outputs": [],
"source": [
"LABEL = data.LabelField()\n",
"TWEET = data.Field(tokenize='spacy', lower=True)\n",
"TWEET = data.Field('spacy', tokenizer_language='en_core_web_sm', lower=True)\n",
"\n",
"fields = [('score',None), ('id',None),('date',None),('query',None),\n",
" ('name',None),\n",
" ('tweet', TWEET),('category',None),('label',LABEL)] "
"fields = [('score',None), ('id',None), ('date',None), ('query',None),\n",
" ('name',None), ('tweet', TWEET), ('category',None), ('label',LABEL)]"
]
},
{
Expand All @@ -116,7 +111,7 @@
"metadata": {},
"outputs": [],
"source": [
"twitterDataset = torchtext.data.TabularDataset(\n",
"twitterDataset = data.dataset.TabularDataset(\n",
" path=\"train-processed-sample.csv\", \n",
" format=\"CSV\", \n",
" fields=fields,\n",
Expand All @@ -140,7 +135,8 @@
}
],
"source": [
"(train, test, valid)=twitterDataset.split(split_ratio=[0.6,0.2,0.2],stratified=True, strata_field='label')\n",
"(train, test, valid) = twitterDataset.split(split_ratio=[0.6,0.2,0.2],\n",
" stratified=True, strata_field='label')\n",
"\n",
"(len(train),len(test),len(valid))"
]
Expand Down Expand Up @@ -184,11 +180,11 @@
"outputs": [],
"source": [
"train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(\n",
"(train, valid, test), \n",
"batch_size = 32,\n",
"device = device,\n",
"sort_key = lambda x: len(x.tweet),\n",
"sort_within_batch = False)"
" (train, valid, test),\n",
" batch_size = 32,\n",
" device = device,\n",
" sort_key = lambda x: len(x.tweet),\n",
" sort_within_batch = False)"
]
},
{
Expand Down Expand Up @@ -254,7 +250,7 @@
"criterion = nn.CrossEntropyLoss()\n",
"\n",
"def train(epochs, model, optimizer, criterion, train_iterator, valid_iterator):\n",
" for epoch in range(1, epochs + 1):\n",
" for epoch in range(1, epochs+1):\n",
" \n",
" training_loss = 0.0\n",
" valid_loss = 0.0\n",
Expand Down Expand Up @@ -347,7 +343,7 @@
" remaining = list(filter(lambda x: random.uniform(0,1) > p,words))\n",
" if len(remaining) == 0:\n",
" return [random.choice(words)]\n",
" else\n",
" else:\n",
" return remaining"
]
},
Expand Down

0 comments on commit 39bc545

Please sign in to comment.